From 0a4915abf741a4f7f8017db24057234eae4b9738 Mon Sep 17 00:00:00 2001
From: Akhil Raj <lf32.dev@gmail.com>
Date: Thu, 16 Jun 2022 10:46:39 +0530
Subject: [PATCH 01/59] create an app to detect licenses from input text

Signed-off-by: Akhil Raj <lf32.dev@gmail.com>
---
 scancodeio/settings.py                        |   3 +-
 scancodeio/static/favicon.ico                 | Bin 0 -> 15406 bytes
 scancodeio/urls.py                            |   1 +
 scanpipe/templates/scanpipe/base.html         |   1 +
 scantext/__init__.py                          |  21 ++
 scantext/admin.py                             |  25 ++
 scantext/apps.py                              |  28 ++
 scantext/forms.py                             |  64 ++++
 scantext/migrations/__init__.py               |   0
 scantext/models.py                            |  25 ++
 .../includes/license_summary_level.html       |  35 ++
 .../templates/scantext/license_detail.html    | 107 ++++++
 scantext/templates/scantext/license_scan.html |  84 +++++
 scantext/tests.py                             |  25 ++
 scantext/urls.py                              |  29 ++
 scantext/views.py                             | 312 ++++++++++++++++++
 16 files changed, 759 insertions(+), 1 deletion(-)
 create mode 100644 scancodeio/static/favicon.ico
 create mode 100644 scantext/__init__.py
 create mode 100644 scantext/admin.py
 create mode 100644 scantext/apps.py
 create mode 100644 scantext/forms.py
 create mode 100644 scantext/migrations/__init__.py
 create mode 100644 scantext/models.py
 create mode 100644 scantext/templates/scantext/includes/license_summary_level.html
 create mode 100644 scantext/templates/scantext/license_detail.html
 create mode 100644 scantext/templates/scantext/license_scan.html
 create mode 100644 scantext/tests.py
 create mode 100644 scantext/urls.py
 create mode 100644 scantext/views.py
diff --git a/scancodeio/settings.py b/scancodeio/settings.py
index 665dd3eb5..63c9dc4da 100644
--- a/scancodeio/settings.py
+++ b/scancodeio/settings.py
@@ -45,7 +45,7 @@
 ALLOWED_HOSTS = env.list("ALLOWED_HOSTS", default=[".localhost", "127.0.0.1", "[::1]"])
 
 # SECURITY WARNING: don't run with debug turned on in production
-DEBUG = env.bool("SCANCODEIO_DEBUG", default=False)
+DEBUG = env.bool("SCANCODEIO_DEBUG", default=True)
 
 SCANCODEIO_REQUIRE_AUTHENTICATION = env.bool(
     "SCANCODEIO_REQUIRE_AUTHENTICATION", default=False
@@ -78,6 +78,7 @@
     # Local apps
     # Must come before Third-party apps for proper templates override
     "scanpipe",
+    "scantext",
     # Django built-in
     "django.contrib.auth",
     "django.contrib.contenttypes",
diff --git a/scancodeio/static/favicon.ico b/scancodeio/static/favicon.ico
new file mode 100644
index 0000000000000000000000000000000000000000..e459dba5ac9a21d6dc81430b10b8d4da5a7d366b
GIT binary patch
literal 15406
zcmeHO36vJa5$>Mp?w><oxkUr4NXQBXksx>kMTq*8XHXI}L=pvKf+A>Q@M6IQyigHQ
zNEC4+AP6FYpooI7<RItM#2C$eX^2TojEUheA(gLscCLTszt~-tefjsjVdn4d>Z-1;
zuBxsogpk4&6%_*SN-<dpF;oaqS?T?*>L^4l+6E1(@9!tXuC78<VGKY3;kDzH`e*8B
zR%R#d$3rDP`x^l3<2o3;K5XJ`*8h?8qdz9I`R|FoOx=IUJpL;(D}P3r9sh~-n`J*C
zIp7omgGe|I38l%s=u5;f%oFpqS4bW+!0X2t*{vt(2fh)^VV1u}&Uq8btobcq4v@Ze
zy&bFXd7d&a>>=5`7daz7N#^N4lX>o4a;MBE^Th80_ypgd{}%~Yk-B~<so76}CYQ{n
zw{wrL-#cEvy5@e8eLhU0ZF@_PJFy1%dl!UNQ|=|9GbB28Bj^0jlUemk3sVd{uZ#KS
zPMSlR?f;J9Qgfd4=->F4SbKEMX5-s-jGB5MnHTqyUivb*6K=*_*r%;~Ngi2A&hQJ#
ztbdElv+t0)c9G?4bPhFbA&E@uU=N)U;~)p`TAXItt0a#d81CQEeaY1RFiFpr_sA@N
zoy;?TOZM%A{9u30>R$(P5yjtt?}nU#hMYkQo2nE_Xk<N8iDv{J)=xSAYREA=sTzFR
z-2h&t>`Pv~wVQE@5O=BYjWWI&)}2lFq2zfs?(*-_ZR;W9nR#kA>ASa(nz@ox%^K1V
zeUCCPzF!=E*ipUY2Nq7vS_PZ<buzGB#hh<FcOA*&h6XwrdL#Vq(8aSQx#-w&05;$O
z5?zlDVSM;jZOFagsvJ5Tx&rpeIeS79j%^?8Y?Kb?<k1OVA+!B`a!wx`8yAJ~zh%!4
zQD)ozBro7oIOk5Z@a#vZFE1gp;Vo|*+dlXVvR73wzq+wDHitX@`k+ttJ%RLHo5^hX
zCvfc}{m9GKc3WFKd0q@IwkO~#+gG-wv3B2&SosU!kQR?K?A%}s`vmNl@EYZ@Cs2m*
zrpUHl`ci{!q-fj0!-F;YINVEav~7%^W!7(R?6+C*dVt3nJTkx|tA=>(ZQ3RLoW`KR
z!2e*MPj)^kz^|^nJHRhmx3%rE>ZG8p!FVB~YS!cAoOXWbKh>WldHkt?pJm{O`LgW{
zJ*QFdvv131z@ImGw?2a0&&;sCpTEAw#;>kk5XTGOk$p^cLv8x~0dg)t7t`2K{+nms
z33;szxmVnl6Q{p^eh2yzbY%P4(Q$>o^@44|+TBZM7QL75XR8A^J@46~;j-z`_iYd6
zalde9>O6YUH_2q(8O)mB;BIETJ*XTI`1jfK^F1iLE&7}FrD)UMqoI9g(zmR({AU?s
z9qN8!n$>fB-ZRhrBQ{r*2ePt1>D!;SbQs+8^jE)4&d7^Nbm)wG=FekrQT+VPdOwi&
z`tz3feME(wGX@Kr>x>#-5Z3wVn7sHc4Rh9##9b(Xm(K!hlzH-v^f_R|)bxjOFYBFz
zZQqI9OJ_lE6z2!ndpYjx`nC<E;1lS@-zow20Pb<G*!wo$xy?i0FWz@av0E`dm;2NA
zzUA_zIqyLB9@<>;m30R`lza&8-_SAt*MA%*L5Nmdh9_^F1pQ(wbuc?<0`_eeWM)ru
z$xRV>_b`o2E7Q!R^BZuB@$4-E{&3MTOzUEw@0mK<*MeyGcnF#!GIO}_wIn)PfV>x4
z&*6Ml3(;QgZ@IordGE@5SF{Y6b$=j@Sg%5E(7kX9xgVcQ?)d3mEOOav$(Uy;yxod8
z<UKFgn1-Eu4CY4MK+Rl*`{bKN)0l04Imf7L5HA{Z1_|lKCgE))@1N2^d}atKzSDCa
zuRe#HJzwlM#&B%G{oGs<6<w2a`gjm~;5eCCgxqhj_glo9IacDFbhejY_2(+|Mm*X%
z^CIM|k#Ee$f5eQLx3ce`{CtVxL+ryHcP*K<zt2B6$JuXQhInL;+&tl&Ya7JH!1u&{
zAfE0FJRNboed%}*%TPBwf_O=nBy5!4v~TQ%d&TVq<6wH;W|CbZc>=^#<cVjJu3mwB
zau_GFdF7hbBnJ++`Tyuz+)1-j<;Xnz65^M=LfRph%qw^7)ug|&3%QW@Y+Rn}4tSli
z|H)wt@q;H~Z@nCQ-u>l0s9s5;T}K;t<TJpr{#-De&v8B$j}OV-Rk5{NSj^!JIV(;(
z)^6i3x#SJ8Fm>I7<lqc)tSc7{iFVEt-Mfv<qpw1@94H9KwGmNyk`>)ZFRY8fam?D*
zYShby>~th?%&_2j$9Xgx{uG-lL2s`0m(U(NGmtrj_*8%g`4=l^5xI6CGeLg|c$x%;
zWBtf+1ZM!Ur|;Ml(5JZ`rt1g7ew=;Hd)wS}*W*{y<FQrV3;p1C1N!vrwWyEr@=Lr%
z*`rTUa@nNoLmr0pG1tIEX|we88yt&6+cEWVaPBWA*4resv)x3To9z$V6A7K>=LZAX
zfgRI0s{ww%xR>1$vsKR6sbT-2kRj9+>HF(!9+zvgtgOovpTcBptyU}_4qfJcI?TcH
zI^9cdIyCfiju*LA=TzkCvU1e{zgee%=dAP9jOEFAcyrMYe@R_6-`1DKXk~i+r(Gv<
zP8p3eu)x;p1u_$riDr@?J!3^+y90WeZqy1ogGV7BvXsnYKZVcvSJ<HZHcm~$8IPXd
z;TJTm{GcWyW{)825_vCo^yRkREtStvo@ae)uIY>FPt@lXW7kW?<B!*OYz$=tJeTkn
zxmG21F0*L#Vbk84>pi1+XDLb7{;0|JK%o{RDl>c+Xh{Cn)}VZ3RD4d<Y$9&iBx~Rr
zgc$wMKZRsG1DP}HcjuG~J@+Ztim={BcI-}^7cMw2`@UH<rfGW|ZPW9&#^|^8yPPY8
zE{Tq_-)z$$AMThdNgjV1{M_#($FQuyhqHRv*Ig24>bPN~7w{U=_kr)Q?&Y_7^{Z*P
zbICB)EPM6~*MM4EtKX+CwDqW*duF}E^;dlNkUfs^d}F4GYh82XqHSICQGLRBc|6zu
zo;L~hai^CP=Y0e}6l#`jU9o9JqE%byyCvy-$*~4~=cbs>h{{vmZ`?24abHa5DfFzW
zevD+N?%^8z{V4F}JE5(0%R#S|g=imqVs#B_&LF3G>Gkn4FHy&(ut!`2o|w;{o8tpq
z3)g>G>^_qh2kPFq);fBB%{jl1hu`c!aE;i=Plfd-KE07Q*B>ePXa;p)wgx*oN7^@^
znYC|t_PtA3yOdZ1*Y$919`gMMtGn$w8Q;1$;a9HqVptbnhkCY~?76b{(p+_eX|!jz
z@qXYOaeWZS#JJv0O}@*<a8mzYK{kB(cWzlP^QO$3vJ5n}47B7Nq^&0?%9b65a^%-S
rfc}<L`{2vtUcZh5^e=8vH4t9ChOOU#&(X5$XY2yX|CBj!a2)s_g>kU^

literal 0
HcmV?d00001

diff --git a/scancodeio/urls.py b/scancodeio/urls.py
index a8bd62ce9..5d2dc7409 100644
--- a/scancodeio/urls.py
+++ b/scancodeio/urls.py
@@ -52,6 +52,7 @@
     path("admin/", admin.site.urls),
     path("api/", include(api_router.urls)),
     path("license/", include(licenses.urls)),
+    path("scan/", include("scantext.urls")),
     path("", include("scanpipe.urls")),
     path("", RedirectView.as_view(url="project/")),
 ]
diff --git a/scanpipe/templates/scanpipe/base.html b/scanpipe/templates/scanpipe/base.html
index 711a79b80..0c5b9835c 100644
--- a/scanpipe/templates/scanpipe/base.html
+++ b/scanpipe/templates/scanpipe/base.html
@@ -5,6 +5,7 @@
     <meta charset="utf-8">
     <meta name="viewport" content="width=device-width, initial-scale=1">
     <title>{% block title %}ScanCode.io{% endblock %}</title>
+    <link rel="icon" href="{% static 'favicon.ico' %}" type="image/x-icon"/>
     <link rel="stylesheet" href="{% static 'bulma-0.9.2.min.css' %}" crossorigin="anonymous">
     <link rel="stylesheet" href="{% static 'highlight-10.6.0.css' %}" crossorigin="anonymous">
     <style>
diff --git a/scantext/__init__.py b/scantext/__init__.py
new file mode 100644
index 000000000..1f72cf031
--- /dev/null
+++ b/scantext/__init__.py
@@ -0,0 +1,21 @@
+# SPDX-License-Identifier: Apache-2.0
+#
+# http://nexb.com and https://github.com/nexB/scancode.io
+# The ScanCode.io software is licensed under the Apache License version 2.0.
+# Data generated with ScanCode.io is provided as-is without warranties.
+# ScanCode is a trademark of nexB Inc.
+#
+# You may not use this software except in compliance with the License.
+# You may obtain a copy of the License at: http://apache.org/licenses/LICENSE-2.0
+# Unless required by applicable law or agreed to in writing, software distributed
+# under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+# CONDITIONS OF ANY KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations under the License.
+#
+# Data Generated with ScanCode.io is provided on an "AS IS" BASIS, WITHOUT WARRANTIES
+# OR CONDITIONS OF ANY KIND, either express or implied. No content created from
+# ScanCode.io should be considered or used as legal advice. Consult an Attorney
+# for any legal advice.
+#
+# ScanCode.io is a free software code scanning tool from nexB Inc. and others.
+# Visit https://github.com/nexB/scancode.io for support and download.
diff --git a/scantext/admin.py b/scantext/admin.py
new file mode 100644
index 000000000..9414bde9c
--- /dev/null
+++ b/scantext/admin.py
@@ -0,0 +1,25 @@
+# SPDX-License-Identifier: Apache-2.0
+#
+# http://nexb.com and https://github.com/nexB/scancode.io
+# The ScanCode.io software is licensed under the Apache License version 2.0.
+# Data generated with ScanCode.io is provided as-is without warranties.
+# ScanCode is a trademark of nexB Inc.
+#
+# You may not use this software except in compliance with the License.
+# You may obtain a copy of the License at: http://apache.org/licenses/LICENSE-2.0
+# Unless required by applicable law or agreed to in writing, software distributed
+# under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+# CONDITIONS OF ANY KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations under the License.
+#
+# Data Generated with ScanCode.io is provided on an "AS IS" BASIS, WITHOUT WARRANTIES
+# OR CONDITIONS OF ANY KIND, either express or implied. No content created from
+# ScanCode.io should be considered or used as legal advice. Consult an Attorney
+# for any legal advice.
+#
+# ScanCode.io is a free software code scanning tool from nexB Inc. and others.
+# Visit https://github.com/nexB/scancode.io for support and download.
+
+from django.contrib import admin
+
+# Register your models here.
diff --git a/scantext/apps.py b/scantext/apps.py
new file mode 100644
index 000000000..695cd08ca
--- /dev/null
+++ b/scantext/apps.py
@@ -0,0 +1,28 @@
+# SPDX-License-Identifier: Apache-2.0
+#
+# http://nexb.com and https://github.com/nexB/scancode.io
+# The ScanCode.io software is licensed under the Apache License version 2.0.
+# Data generated with ScanCode.io is provided as-is without warranties.
+# ScanCode is a trademark of nexB Inc.
+#
+# You may not use this software except in compliance with the License.
+# You may obtain a copy of the License at: http://apache.org/licenses/LICENSE-2.0
+# Unless required by applicable law or agreed to in writing, software distributed
+# under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+# CONDITIONS OF ANY KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations under the License.
+#
+# Data Generated with ScanCode.io is provided on an "AS IS" BASIS, WITHOUT WARRANTIES
+# OR CONDITIONS OF ANY KIND, either express or implied. No content created from
+# ScanCode.io should be considered or used as legal advice. Consult an Attorney
+# for any legal advice.
+#
+# ScanCode.io is a free software code scanning tool from nexB Inc. and others.
+# Visit https://github.com/nexB/scancode.io for support and download.
+
+from django.apps import AppConfig
+
+
+class ScantextConfig(AppConfig):
+    default_auto_field = "django.db.models.BigAutoField"
+    name = "scantext"
diff --git a/scantext/forms.py b/scantext/forms.py
new file mode 100644
index 000000000..7681f52e9
--- /dev/null
+++ b/scantext/forms.py
@@ -0,0 +1,64 @@
+# SPDX-License-Identifier: Apache-2.0
+#
+# http://nexb.com and https://github.com/nexB/scancode.io
+# The ScanCode.io software is licensed under the Apache License version 2.0.
+# Data generated with ScanCode.io is provided as-is without warranties.
+# ScanCode is a trademark of nexB Inc.
+#
+# You may not use this software except in compliance with the License.
+# You may obtain a copy of the License at: http://apache.org/licenses/LICENSE-2.0
+# Unless required by applicable law or agreed to in writing, software distributed
+# under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+# CONDITIONS OF ANY KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations under the License.
+#
+# Data Generated with ScanCode.io is provided on an "AS IS" BASIS, WITHOUT WARRANTIES
+# OR CONDITIONS OF ANY KIND, either express or implied. No content created from
+# ScanCode.io should be considered or used as legal advice. Consult an Attorney
+# for any legal advice.
+#
+# ScanCode.io is a free software code scanning tool from nexB Inc. and others.
+# Visit https://github.com/nexB/scancode.io for support and download.
+
+from django import forms
+
+
+class EditorForm(forms.Form):
+    input_text = forms.CharField(
+        widget=forms.Textarea(
+            attrs={
+                "rows": 30,
+                "class": "textarea has-fixed-size",
+                "placeholder": "Paste your license text here.",
+            }
+        ),
+        required=True,
+    )
+
+    # def clean_input_text(self):
+    #     input_text = self.cleaned_data.get("input_text")
+    #     return " ".join(input_text.split())
+
+    # def save(self, *args, **kwargs):
+    #     license = super().save(*args, **kwargs)
+    #     self.handle_input(license)
+    #     return license
+
+    # class Media:
+    #     js = ("add-inputs.js",)
+
+    # def handle_inputs(self, project):
+    #     input_file = self.files.getlist("input_files")
+    #     input_text = self.cleaned_data.get("input_text")
+    #     print(input_text)
+    #     if input_file:
+    #         license.add_uploads(input_file)
+    #     elif input_text:
+    #         license.add_license(input_text)
+
+    # input_files = forms.FileField(
+    #     required=False,
+    #     widget=forms.ClearableFileInput(
+    #         attrs={"class": "file-input", "multiple": False},
+    #     ),
+    # )
diff --git a/scantext/migrations/__init__.py b/scantext/migrations/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/scantext/models.py b/scantext/models.py
new file mode 100644
index 000000000..458130fff
--- /dev/null
+++ b/scantext/models.py
@@ -0,0 +1,25 @@
+# SPDX-License-Identifier: Apache-2.0
+#
+# http://nexb.com and https://github.com/nexB/scancode.io
+# The ScanCode.io software is licensed under the Apache License version 2.0.
+# Data generated with ScanCode.io is provided as-is without warranties.
+# ScanCode is a trademark of nexB Inc.
+#
+# You may not use this software except in compliance with the License.
+# You may obtain a copy of the License at: http://apache.org/licenses/LICENSE-2.0
+# Unless required by applicable law or agreed to in writing, software distributed
+# under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+# CONDITIONS OF ANY KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations under the License.
+#
+# Data Generated with ScanCode.io is provided on an "AS IS" BASIS, WITHOUT WARRANTIES
+# OR CONDITIONS OF ANY KIND, either express or implied. No content created from
+# ScanCode.io should be considered or used as legal advice. Consult an Attorney
+# for any legal advice.
+#
+# ScanCode.io is a free software code scanning tool from nexB Inc. and others.
+# Visit https://github.com/nexB/scancode.io for support and download.
+
+from django.db import models
+
+# Create your models here.
diff --git a/scantext/templates/scantext/includes/license_summary_level.html b/scantext/templates/scantext/includes/license_summary_level.html
new file mode 100644
index 000000000..23455f525
--- /dev/null
+++ b/scantext/templates/scantext/includes/license_summary_level.html
@@ -0,0 +1,35 @@
+{% load humanize %}
+<nav class="level is-mobile">
+  <div class="level-item has-text-centered">
+    <div>
+      <p class="heading">Percentage Of License Text</p>
+      <p class="title">
+          <span>
+            {{ expr.percentage_of_license_text }}
+          </span>
+      </p>
+    </div>
+  </div>
+  <div class="level-item has-text-centered">
+    <div>
+      <p class="heading">License Expressions</p>
+      <p class="title">
+          <span>
+            {{ expr.license_expressions | length }}
+
+          </span>
+      </p>
+    </div>
+  </div>
+  <div class="level-item has-text-centered">
+    <div>
+      <p class="heading">Licenses</p>
+      <p class="title">
+          <span>
+            {{ expr.licenses | length }}
+
+          </span>
+      </p>
+    </div>
+  </div>
+</nav>
diff --git a/scantext/templates/scantext/license_detail.html b/scantext/templates/scantext/license_detail.html
new file mode 100644
index 000000000..b04837927
--- /dev/null
+++ b/scantext/templates/scantext/license_detail.html
@@ -0,0 +1,107 @@
+{% extends 'scanpipe/base.html' %}
+{% load static humanize %}
+
+
+{% block extrahead %}
+  <link rel="stylesheet" href="{% static 'billboard-3.0.1-datalab.min.css' %}" crossorigin="anonymous" />
+{% endblock %}
+
+{% block content %}
+<div class="container is-widescreen">
+    {% include 'scanpipe/includes/navbar_header.html' %}
+    <div class="mx-5 mb-2">{% include 'scanpipe/includes/messages.html' %}</div>
+
+    <section class="section pt-0">
+      <div class="is-flex is-justify-content-space-between mb-2">
+        <div class="mb-2">
+          <h1 class="title is-4">License Detection Summary</h1>
+        </div>
+        <div>
+          <a href="{% url 'license_scan' %}" class="button is-link">
+            Scan Again
+          </a>
+        </div>
+      </div>
+    </section>
+
+    <hr class="mx-5">
+    {% include "scantext/includes/license_summary_level.html" with expr=expr %}
+
+    <hr class="mx-5">
+
+      <div class="columns">
+        <div class="column is-half">
+          <p class="title is-4">Input License Text</p>
+           <div class="box">
+             {% for line in text %}
+              <p>{{line}}</p>
+             {% endfor %}
+           </div>
+        </div>
+        <div class="column is-half">
+          <p class="title is-4">Detected License Errors</p>
+
+          {% for license in expr.licenses %}
+
+          <div class="card">
+            <div class="card-header">
+              <div class="card-header-title">
+                  <a class="" href="{{license.homepage_url}}" title="{{license.short_name}}">{{license.name| center:"15"}}</a>
+                  <p class="tag is-6 mx-1 is-light 
+                  {% if license.score == 100 %}
+                  is-success
+                  {% else %}
+                  is-warning
+                  {% endif %}
+                  ">{{license.score}}</p>
+                  <p class="tag is-6 mx-1 is-light is-info">
+                  {% if license.start_line == license.end_line %}
+                    Line {{license.start_line}}
+                  {% else %}
+                    Lines {{license.start_line}} - {{license.end_line}}
+                  {% endif %}
+                  </p>
+                  <p class="tag is-6 mx-1 is-light is-primary">{{license.category}}</p>
+              </div>
+              <p class="card-header-icon" aria-label="more options">
+                <span class="icon">
+                  <i class="fas fa-angle-down" aria-hidden="true"></i>
+                </span>
+              </p>
+          </div>
+          {% if license.score < 100 %}
+          <div class="card-content">
+              <p class="subtitle is-5">Recommended License</p>
+              <p class="content">{{license}}</p>
+          </div>
+          {% endif %}
+        </div>
+        <br>
+          {% endfor %}
+      </div>
+    </div>
+</div>
+{% endblock %}
+
+{% block scripts %}
+<script type="text/javascript">
+const cards = document.querySelectorAll('.card')
+
+// const cardsicons = document.querySelectorAll('.card-header-icon')
+// cardsicons.forEach(icon => {
+//   icon.addEventListener('click', (e, index) => {
+//     e.preventDefault();
+//     removeActiveCards();
+//     console.log(e)
+//     console.log(index)
+//     icon.parentNode.parentNode.parentNode.querySelector('.card-content').style.display="visible"
+//   })
+// })
+
+// function removeActiveCards() {
+//     cards.forEach(card => {
+//         card.parentNode.parentNode.parentNode.querySelector('.card-content').style.display="none"
+//     })
+// }
+</script>
+{% endblock %}
\ No newline at end of file
diff --git a/scantext/templates/scantext/license_scan.html b/scantext/templates/scantext/license_scan.html
new file mode 100644
index 000000000..996c51511
--- /dev/null
+++ b/scantext/templates/scantext/license_scan.html
@@ -0,0 +1,84 @@
+{% extends "scanpipe/base.html" %}
+{% load static humanize %}
+
+{% block content %}
+
+  <div class="container is-max-desktop">
+    {% include 'scanpipe/includes/navbar_header.html' %}
+    <div class="mx-5 mb-2">{% include 'scanpipe/includes/messages.html' %}</div>
+
+    <section class="section pt-0">
+      <form method="post" action="{% url 'license_scan' %}" >
+        {% csrf_token %}
+      <div class="is-flex is-justify-content-space-between mb-2">
+        <div class="mb-2">
+          <h1 class="title is-5">Scan License</h1>
+        </div>
+        <div>
+          <a href="{% url 'project_add' %}" class="button is-info">New Project</a>
+          <input type="submit" class="button is-link" value="Scan License">
+        </div>
+      </div>
+        {{form.input_text}}
+      </form>
+    </section>
+  </div>
+{% endblock %}
+
+<!--       <div id="editor" style="min-height:730px; border: lightgrey 1px solid;"></div>
+ -->
+<!-- 
+{% block scripts %}
+   <script src="{% static 'ace-1.4.12.min.js' %}" crossorigin="anonymous"></script>
+  <script>
+    let editor = ace.edit("editor", {
+      mode: "ace/mode/text",
+      autoScrollEditorIntoView: true,
+      wrap: true,
+      readOnly: false,
+      showPrintMargin: false,
+      highlightActiveLine: true,
+      highlightGutterLine: false,
+      fontSize: 15,
+      foldStyle: "manual",
+      fontFamily: "SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace",
+    });
+
+    let form = document.querySelector('form');
+
+    form.addEventListener('submit', (event) => {
+      
+      let background = displayOverlay();
+
+      // The upload progress is only added when input files are provided.
+      // if (!form["input_files"].files.length) return false;
+
+      event.preventDefault();
+
+      // let progress_bar = document.createElement('progress');
+      // progress_bar.className = 'progress is-success is-medium file-upload';
+      // progress_bar.setAttribute('value', '0');
+      // progress_bar.setAttribute('max', '100');
+
+      // let progress_container = document.createElement('div');
+      // progress_container.className = 'container is-max-desktop mt-6 px-6';
+      // progress_container.appendChild(progress_bar)
+      // background.appendChild(progress_container);
+
+      // let form_errors = document.getElementById('form-errors');
+      // displayFormUploadProgress(form, progress_bar, form_errors);
+
+      // The upload progress is only added when input files are provided.
+      // if (!form["input_files"].files.length) return false;
+
+      // event.preventDefault();
+      // console.log(form["input_files"].files);
+
+      // event.preventDefault();
+      hidden_input = document.querySelector('#id_input_text');
+      hidden_input.value = editor.getValue();
+      console.log(hidden_input.value)
+    });
+
+  </script>
+{% endblock %} -->
diff --git a/scantext/tests.py b/scantext/tests.py
new file mode 100644
index 000000000..33eeb08c8
--- /dev/null
+++ b/scantext/tests.py
@@ -0,0 +1,25 @@
+# SPDX-License-Identifier: Apache-2.0
+#
+# http://nexb.com and https://github.com/nexB/scancode.io
+# The ScanCode.io software is licensed under the Apache License version 2.0.
+# Data generated with ScanCode.io is provided as-is without warranties.
+# ScanCode is a trademark of nexB Inc.
+#
+# You may not use this software except in compliance with the License.
+# You may obtain a copy of the License at: http://apache.org/licenses/LICENSE-2.0
+# Unless required by applicable law or agreed to in writing, software distributed
+# under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+# CONDITIONS OF ANY KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations under the License.
+#
+# Data Generated with ScanCode.io is provided on an "AS IS" BASIS, WITHOUT WARRANTIES
+# OR CONDITIONS OF ANY KIND, either express or implied. No content created from
+# ScanCode.io should be considered or used as legal advice. Consult an Attorney
+# for any legal advice.
+#
+# ScanCode.io is a free software code scanning tool from nexB Inc. and others.
+# Visit https://github.com/nexB/scancode.io for support and download.
+
+from django.test import TestCase
+
+# Create your tests here.
diff --git a/scantext/urls.py b/scantext/urls.py
new file mode 100644
index 000000000..38b277a64
--- /dev/null
+++ b/scantext/urls.py
@@ -0,0 +1,29 @@
+# SPDX-License-Identifier: Apache-2.0
+#
+# http://nexb.com and https://github.com/nexB/scancode.io
+# The ScanCode.io software is licensed under the Apache License version 2.0.
+# Data generated with ScanCode.io is provided as-is without warranties.
+# ScanCode is a trademark of nexB Inc.
+#
+# You may not use this software except in compliance with the License.
+# You may obtain a copy of the License at: http://apache.org/licenses/LICENSE-2.0
+# Unless required by applicable law or agreed to in writing, software distributed
+# under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+# CONDITIONS OF ANY KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations under the License.
+#
+# Data Generated with ScanCode.io is provided on an "AS IS" BASIS, WITHOUT WARRANTIES
+# OR CONDITIONS OF ANY KIND, either express or implied. No content created from
+# ScanCode.io should be considered or used as legal advice. Consult an Attorney
+# for any legal advice.
+#
+# ScanCode.io is a free software code scanning tool from nexB Inc. and others.
+# Visit https://github.com/nexB/scancode.io for support and download.
+
+from django.urls import path
+
+from scantext import views
+
+urlpatterns = [
+    path("", views.license_scanview, name="license_scan"),
+]
diff --git a/scantext/views.py b/scantext/views.py
new file mode 100644
index 000000000..cd670a316
--- /dev/null
+++ b/scantext/views.py
@@ -0,0 +1,312 @@
+# SPDX-License-Identifier: Apache-2.0
+#
+# http://nexb.com and https://github.com/nexB/scancode.io
+# The ScanCode.io software is licensed under the Apache License version 2.0.
+# Data generated with ScanCode.io is provided as-is without warranties.
+# ScanCode is a trademark of nexB Inc.
+#
+# You may not use this software except in compliance with the License.
+# You may obtain a copy of the License at: http://apache.org/licenses/LICENSE-2.0
+# Unless required by applicable law or agreed to in writing, software distributed
+# under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+# CONDITIONS OF ANY KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations under the License.
+#
+# Data Generated with ScanCode.io is provided on an "AS IS" BASIS, WITHOUT WARRANTIES
+# OR CONDITIONS OF ANY KIND, either express or implied. No content created from
+# ScanCode.io should be considered or used as legal advice. Consult an Attorney
+# for any legal advice.
+#
+# ScanCode.io is a free software code scanning tool from nexB Inc. and others.
+# Visit https://github.com/nexB/scancode.io for support and download.
+
+import pprint
+import sys
+
+from django.http import HttpResponseRedirect
+from django.shortcuts import render
+from django.views import generic
+
+from licensedcode import cache
+
+from scancodeio.auth import ConditionalLoginRequired
+from scantext.forms import EditorForm
+
+SPDX_LICENSE_URL = "https://spdx.org/licenses/{}"
+DEJACODE_LICENSE_URL = "https://enterprise.dejacode.com/urn/urn:dje:license:{}"
+SCANCODE_LICENSEDB_URL = "https://scancode-licensedb.aboutcode.org/{}"
+
+
+def license_scanview(request):
+    form = EditorForm()
+    if request.method == "POST":
+        form = EditorForm(request.POST)
+        if form.is_valid():
+            # idx = cache.get_index()
+            # matches = idx.match(query_string=text)
+            # print(type(matches))
+            # print(type(matches[0].rule))
+            text = form.cleaned_data["input_text"]
+            expressions = get_licenses("/home/human/Desktop/license-text.txt")
+            pp = pprint.PrettyPrinter(indent=4)
+            pp.pprint(expressions)
+            return render(
+                request,
+                "scantext/license_detail.html",
+                {
+                    "text": text.split("\r"),
+                    "expr": expressions,
+                },
+            )
+    return render(request, "scantext/license_scan.html", {"form": form})
+
+
+def get_licenses(
+    location,
+    min_score=0,
+    include_text=False,
+    license_text_diagnostics=False,
+    license_url_template=SCANCODE_LICENSEDB_URL,
+    unknown_licenses=False,
+    deadline=sys.maxsize,
+    **kwargs,
+):
+    """
+    Return a mapping or detected_licenses for licenses detected in the file at
+    `location`
+    This mapping contains two keys:
+     - 'licenses' with a value that is list of mappings of license information.
+     - 'license_expressions' with a value that is list of license expression
+       strings.
+    `min_score` is a minimum score threshold from 0 to 100. The default is 0,
+    meaning that all license matches are returned. If specified, matches with a
+    score lower than `minimum_score` are not returned.
+    If `include_text` is True, matched text is included in the returned
+    `licenses` data as well as a file-level `percentage_of_license_text` percentage to
+    indicate the overall proportion of detected license text and license notice
+    words in the file. This is used to determine if a file contains mostly
+    licensing information.
+    If ``unknown_licenses`` is True, also detect unknown licenses.
+    """
+    from licensedcode import cache
+    from licensedcode.spans import Span
+
+    idx = cache.get_index()
+
+    detected_licenses = []
+    detected_expressions = []
+
+    matches = idx.match(
+        location=location,
+        min_score=min_score,
+        deadline=deadline,
+        unknown_licenses=unknown_licenses,
+        **kwargs,
+    )
+
+    qspans = []
+    match = None
+    for match in matches:
+        qspans.append(match.qspan)
+
+        detected_expressions.append(match.rule.license_expression)
+
+        detected_licenses.extend(
+            _licenses_data_from_match(
+                match=match,
+                include_text=include_text,
+                license_text_diagnostics=license_text_diagnostics,
+                license_url_template=license_url_template,
+            )
+        )
+
+    percentage_of_license_text = 0
+    if match:
+        # we need at least one match to compute a license_coverage
+        matched_tokens_length = len(Span().union(*qspans))
+        query_tokens_length = match.query.tokens_length(with_unknown=True)
+        percentage_of_license_text = round(
+            (matched_tokens_length / query_tokens_length) * 100, 2
+        )
+
+    detected_spdx_expressions = []
+    return dict(
+        [
+            ("licenses", detected_licenses),
+            ("license_expressions", detected_expressions),
+            ("spdx_license_expressions", detected_spdx_expressions),
+            ("percentage_of_license_text", percentage_of_license_text),
+        ]
+    )
+
+
+def _licenses_data_from_match(
+    match,
+    include_text=False,
+    license_text_diagnostics=False,
+    license_url_template=SCANCODE_LICENSEDB_URL,
+):
+    """
+    Return a list of "licenses" scan data built from a license match.
+    Used directly only internally for testing.
+    """
+    from licensedcode import cache
+
+    licenses = cache.get_licenses_db()
+
+    matched_text = None
+    if include_text:
+        if license_text_diagnostics:
+            matched_text = match.matched_text(whole_lines=False, highlight=True)
+        else:
+            matched_text = match.matched_text(whole_lines=True, highlight=False)
+
+    SCANCODE_BASE_URL = "https://github.com/nexB/scancode-toolkit/tree/develop/src/licensedcode/data/licenses"
+    SCANCODE_LICENSE_TEXT_URL = SCANCODE_BASE_URL + "/{}.LICENSE"
+    SCANCODE_LICENSE_DATA_URL = SCANCODE_BASE_URL + "/{}.yml"
+
+    detected_licenses = []
+    for license_key in match.rule.license_keys():
+        lic = licenses.get(license_key)
+        result = {}
+        detected_licenses.append(result)
+        result["key"] = lic.key
+        result["score"] = match.score()
+        result["name"] = lic.name
+        result["short_name"] = lic.short_name
+        result["category"] = lic.category
+        result["is_exception"] = lic.is_exception
+        result["is_unknown"] = lic.is_unknown
+        result["owner"] = lic.owner
+        result["homepage_url"] = lic.homepage_url
+        result["text_url"] = lic.text_urls[0] if lic.text_urls else ""
+        result["reference_url"] = license_url_template.format(lic.key)
+        result["scancode_text_url"] = SCANCODE_LICENSE_TEXT_URL.format(lic.key)
+        result["scancode_data_url"] = SCANCODE_LICENSE_DATA_URL.format(lic.key)
+
+        spdx_key = lic.spdx_license_key
+        result["spdx_license_key"] = spdx_key
+
+        if spdx_key:
+            is_license_ref = spdx_key.lower().startswith("licenseref-")
+            if is_license_ref:
+                spdx_url = SCANCODE_LICENSE_TEXT_URL.format(lic.key)
+            else:
+                spdx_key = lic.spdx_license_key.rstrip("+")
+                spdx_url = SPDX_LICENSE_URL.format(spdx_key)
+        else:
+            spdx_url = ""
+        result["spdx_url"] = spdx_url
+        result["start_line"] = match.start_line
+        result["end_line"] = match.end_line
+        matched_rule = result["matched_rule"] = {}
+        matched_rule["identifier"] = match.rule.identifier
+        matched_rule["license_expression"] = match.rule.license_expression
+        matched_rule["licenses"] = match.rule.license_keys()
+        matched_rule["referenced_filenames"] = match.rule.referenced_filenames
+        matched_rule["is_license_text"] = match.rule.is_license_text
+        matched_rule["is_license_notice"] = match.rule.is_license_notice
+        matched_rule["is_license_reference"] = match.rule.is_license_reference
+        matched_rule["is_license_tag"] = match.rule.is_license_tag
+        matched_rule["is_license_intro"] = match.rule.is_license_intro
+        matched_rule["has_unknown"] = match.rule.has_unknown
+        matched_rule["matcher"] = match.matcher
+        matched_rule["rule_length"] = match.rule.length
+        matched_rule["matched_length"] = match.len()
+        matched_rule["match_coverage"] = match.coverage()
+        matched_rule["rule_relevance"] = match.rule.relevance
+        # FIXME: for sanity this should always be included?????
+        if include_text:
+            result["matched_text"] = matched_text
+    return detected_licenses
+
+
+# class LicenseScanView(ConditionalLoginRequired, generic.FormView):
+#      template_name = "scantext/license_scan.html"
+#      form_class = EditorForm
+
+#      def form_valid(self, form):
+#          idx = cache.get_index()
+#          text = form.cleaned_data["input_text"]
+#          matches = idx.match(query_string=text)
+#          print(matches)
+#          return HttpResponseRedirect("/scan/history/detail/", {
+#             "matches": matches
+#             })
+# print(form.cleaned_data["input_text"])
+
+#     import magic
+#     def is_text(self, form):
+#         return magic.from_file(self.request.files) == 'text/plain'
+
+#     def form_invalid(self, form):
+#         print("No")
+
+# def LicenseScanView(request):
+#     if request.method == 'POST':
+#         form = EditorForm(request.POST, request.FILES)
+#         if form.is_valid():
+#             print(type(request.FILES['input_files']))
+#             return HttpResponseRedirect('/scan/history/')
+
+#     return render(request, "scantext/license_scan.html", {
+#         "form": EditorForm
+#         })
+
+# class LicenseListView(ConditionalLoginRequired, generic.TemplateView):
+#    template_name = "scantext/license_list.html"
+#
+#
+# class LicenseDetailView(ConditionalLoginRequired, generic.DetailView):
+#    model = License
+#    template_name = "scantext/license_detail.html"
+#
+#
+#    def get_context_data(self, **kwargs):
+#        context = super().get_context_data(**kwargs)
+#        context['now'] = context.objects.all()
+#        return context
+#
+#
+# class LicenseReportView(ConditionalLoginRequired, generic.DetailView):
+#    template_name = "scantext/license_report.html"
+#
+#
+#
+# import ast
+# import json
+# def template_vv(request):
+#
+#    with open("/home/human/dev/sco/output.json", "r") as f:
+#        data = f.read()
+#        # print(data)
+#        co = json.loads(ast.literal_eval(json.dumps(data)))
+#        # print(co)
+#        # print(type(co))
+#        return render(request, "scantext/license_list.html", {
+#        "co": co
+#        })
+#
+# def template_dt(request):
+#
+#    with open("/home/human/dev/sco/output.json", "r") as f:
+#        data = f.read()
+#        # print(data)
+#        co = json.loads(ast.literal_eval(json.dumps(data, sort_keys=True, indent=4)))
+#        # print(co)
+#        # print(type(co))
+#        return render(request, "scantext/license_detail.html", {
+#        "co": co
+#        })
+#
+#    # def get_context_data(self, **kwargs):
+#    #     context = super().get_context_data(**kwargs)
+#    #     context['now'] = json.dumps(co)
+#    #     return context
+#
+#
+#
+# class LicenseResultView(ConditionalLoginRequired, generic.DetailView):
+#    model = License
+#    template_name = "scantext/license_detail.html"
+#

From 9c25575fb3d1118f1586d2c378f6b564fdb9974a Mon Sep 17 00:00:00 2001
From: Akhil Raj <lf32.dev@gmail.com>
Date: Mon, 20 Jun 2022 11:41:06 +0530
Subject: [PATCH 02/59] Improved UI, Changed Scan Path * Dropdown cards work
 well in the details page * Re-arranged html code * Changed Scan path from
 /scan/ to /scantext/

Signed-off-by: Akhil Raj <lf32.dev@gmail.com>
---
 scancodeio/settings.py                        |   2 +-
 scancodeio/urls.py                            |   2 +-
 scantext/forms.py                             |  38 +-
 .../includes/license_summary_level.html       |  52 +-
 .../templates/scantext/license_detail.html    | 158 ++--
 scantext/templates/scantext/license_scan.html | 112 +--
 scantext/tests/__init__.py                    |  21 +
 scantext/tests/data/LICENSES                  | 749 ++++++++++++++++++
 scantext/{tests.py => tests/test.py}          |   0
 scantext/views.py                             | 129 +--
 10 files changed, 938 insertions(+), 325 deletions(-)
 create mode 100644 scantext/tests/__init__.py
 create mode 100644 scantext/tests/data/LICENSES
 rename scantext/{tests.py => tests/test.py} (100%)

diff --git a/scancodeio/settings.py b/scancodeio/settings.py
index d4dea0f75..3c50f38fa 100644
--- a/scancodeio/settings.py
+++ b/scancodeio/settings.py
@@ -45,7 +45,7 @@
 ALLOWED_HOSTS = env.list("ALLOWED_HOSTS", default=[".localhost", "127.0.0.1", "[::1]"])
 
 # SECURITY WARNING: don't run with debug turned on in production
-DEBUG = env.bool("SCANCODEIO_DEBUG", default=True)
+DEBUG = env.bool("SCANCODEIO_DEBUG", default=False)
 
 SCANCODEIO_REQUIRE_AUTHENTICATION = env.bool(
     "SCANCODEIO_REQUIRE_AUTHENTICATION", default=False
diff --git a/scancodeio/urls.py b/scancodeio/urls.py
index 5d2dc7409..08ab7f1eb 100644
--- a/scancodeio/urls.py
+++ b/scancodeio/urls.py
@@ -52,7 +52,7 @@
     path("admin/", admin.site.urls),
     path("api/", include(api_router.urls)),
     path("license/", include(licenses.urls)),
-    path("scan/", include("scantext.urls")),
+    path("scantext/", include("scantext.urls")),
     path("", include("scanpipe.urls")),
     path("", RedirectView.as_view(url="project/")),
 ]
diff --git a/scantext/forms.py b/scantext/forms.py
index 7681f52e9..88ac7b29e 100644
--- a/scantext/forms.py
+++ b/scantext/forms.py
@@ -27,38 +27,16 @@ class EditorForm(forms.Form):
     input_text = forms.CharField(
         widget=forms.Textarea(
             attrs={
-                "rows": 30,
+                "rows": 25,
                 "class": "textarea has-fixed-size",
                 "placeholder": "Paste your license text here.",
             }
         ),
-        required=True,
+        required=False,
+    )
+    input_file = forms.FileField(
+        required=False,
+        widget=forms.ClearableFileInput(
+            attrs={"class": "file-input", "multiple": False},
+        ),
     )
-
-    # def clean_input_text(self):
-    #     input_text = self.cleaned_data.get("input_text")
-    #     return " ".join(input_text.split())
-
-    # def save(self, *args, **kwargs):
-    #     license = super().save(*args, **kwargs)
-    #     self.handle_input(license)
-    #     return license
-
-    # class Media:
-    #     js = ("add-inputs.js",)
-
-    # def handle_inputs(self, project):
-    #     input_file = self.files.getlist("input_files")
-    #     input_text = self.cleaned_data.get("input_text")
-    #     print(input_text)
-    #     if input_file:
-    #         license.add_uploads(input_file)
-    #     elif input_text:
-    #         license.add_license(input_text)
-
-    # input_files = forms.FileField(
-    #     required=False,
-    #     widget=forms.ClearableFileInput(
-    #         attrs={"class": "file-input", "multiple": False},
-    #     ),
-    # )
diff --git a/scantext/templates/scantext/includes/license_summary_level.html b/scantext/templates/scantext/includes/license_summary_level.html
index 23455f525..284f30944 100644
--- a/scantext/templates/scantext/includes/license_summary_level.html
+++ b/scantext/templates/scantext/includes/license_summary_level.html
@@ -1,35 +1,27 @@
 {% load humanize %}
 <nav class="level is-mobile">
-  <div class="level-item has-text-centered">
-    <div>
-      <p class="heading">Percentage Of License Text</p>
-      <p class="title">
-          <span>
-            {{ expr.percentage_of_license_text }}
-          </span>
-      </p>
+    <div class="level-item has-text-centered">
+        <div>
+            <p class="heading">Percentage Of License Text</p>
+            <p class="title">
+              <span>{{ expr.percentage_of_license_text }}</span>
+            </p>
+        </div>
     </div>
-  </div>
-  <div class="level-item has-text-centered">
-    <div>
-      <p class="heading">License Expressions</p>
-      <p class="title">
-          <span>
-            {{ expr.license_expressions | length }}
-
-          </span>
-      </p>
+    <div class="level-item has-text-centered">
+        <div>
+            <p class="heading">License Expressions</p>
+            <p class="title">
+              <span>{{ expr.license_expressions|length }}</span>
+            </p>
+        </div>
     </div>
-  </div>
-  <div class="level-item has-text-centered">
-    <div>
-      <p class="heading">Licenses</p>
-      <p class="title">
-          <span>
-            {{ expr.licenses | length }}
-
-          </span>
-      </p>
+    <div class="level-item has-text-centered">
+        <div>
+            <p class="heading">Licenses</p>
+            <p class="title">
+              <span>{{ expr.licenses|length }}</span>
+            </p>
+        </div>
     </div>
-  </div>
-</nav>
+</nav>
\ No newline at end of file
diff --git a/scantext/templates/scantext/license_detail.html b/scantext/templates/scantext/license_detail.html
index b04837927..dedfc259f 100644
--- a/scantext/templates/scantext/license_detail.html
+++ b/scantext/templates/scantext/license_detail.html
@@ -1,107 +1,99 @@
-{% extends 'scanpipe/base.html' %}
-{% load static humanize %}
-
-
-{% block extrahead %}
-  <link rel="stylesheet" href="{% static 'billboard-3.0.1-datalab.min.css' %}" crossorigin="anonymous" />
-{% endblock %}
+{% extends 'scanpipe/base.html' %} 
+{% load static humanize %} 
 
 {% block content %}
 <div class="container is-widescreen">
     {% include 'scanpipe/includes/navbar_header.html' %}
     <div class="mx-5 mb-2">{% include 'scanpipe/includes/messages.html' %}</div>
 
-    <section class="section pt-0">
-      <div class="is-flex is-justify-content-space-between mb-2">
-        <div class="mb-2">
-          <h1 class="title is-4">License Detection Summary</h1>
-        </div>
-        <div>
-          <a href="{% url 'license_scan' %}" class="button is-link">
-            Scan Again
-          </a>
+    <section class="section p-0">
+        <div class="is-flex is-justify-content-space-between is-align-items-center mb-2">
+            <div class="mb-2">
+                <h1 class="title is-4">License Detection Summary</h1>
+            </div>
+            <div>
+                <a href="{% url 'license_scan' %}" class="button is-link">Scan Again</a>
+            </div>
         </div>
-      </div>
     </section>
 
     <hr class="mx-5">
     {% include "scantext/includes/license_summary_level.html" with expr=expr %}
-
     <hr class="mx-5">
 
-      <div class="columns">
+    <div class="columns mb-5">
         <div class="column is-half">
-          <p class="title is-4">Input License Text</p>
-           <div class="box">
-             {% for line in text %}
-              <p>{{line}}</p>
-             {% endfor %}
-           </div>
+            <p class="title is-4">Input License Text</p>
+            <pre>{{ text }}</pre>
         </div>
         <div class="column is-half">
-          <p class="title is-4">Detected License Errors</p>
-
-          {% for license in expr.licenses %}
-
-          <div class="card">
-            <div class="card-header">
-              <div class="card-header-title">
-                  <a class="" href="{{license.homepage_url}}" title="{{license.short_name}}">{{license.name| center:"15"}}</a>
-                  <p class="tag is-6 mx-1 is-light 
-                  {% if license.score == 100 %}
-                  is-success
-                  {% else %}
-                  is-warning
-                  {% endif %}
-                  ">{{license.score}}</p>
-                  <p class="tag is-6 mx-1 is-light is-info">
-                  {% if license.start_line == license.end_line %}
-                    Line {{license.start_line}}
-                  {% else %}
-                    Lines {{license.start_line}} - {{license.end_line}}
-                  {% endif %}
-                  </p>
-                  <p class="tag is-6 mx-1 is-light is-primary">{{license.category}}</p>
-              </div>
-              <p class="card-header-icon" aria-label="more options">
-                <span class="icon">
-                  <i class="fas fa-angle-down" aria-hidden="true"></i>
-                </span>
-              </p>
-          </div>
-          {% if license.score < 100 %}
-          <div class="card-content">
-              <p class="subtitle is-5">Recommended License</p>
-              <p class="content">{{license}}</p>
-          </div>
-          {% endif %}
+            <p class="title is-4">Detected Licenses</p>
+            <div class="card">
+                {% for license in expr.licenses %}
+                <div class="licenses-card">
+                    <div class="card-header is-flex is-justify-content-space-between">
+                        <div class="card-header-title" title="{{ license.short_name }}">
+                          {% if license.homepage_url %}
+                            <a href="{{ license.homepage_url }}">{{ license.name }}</a> 
+                          {% else %} {{ license.name }} {% endif %}
+                        </div>
+                        <div class="is-flex is-justify-content-row is-align-items-center">
+                            <p class="lines tag is-6 mx-1 is-light is-info">
+                              {% if license.start_line == license.end_line %} Line {{ license.start_line }} {% else %} Lines {{ license.start_line }} - {{ license.end_line }} {% endif %}
+                            </p>
+                            <p class="tag is-6 mx-1 is-light 
+                  {% if license.score == 100 %} is-success {% else %} is-warning {% endif %}">{{ license.score }}</p>
+                            <p class="card-header-icon" aria-label="more options">
+                              <span class="icon">
+                                <i class="fas fa-angle-down" aria-hidden="true"></i>
+                              </span>
+                            </p>
+                        </div>
+                    </div>
+                    <div class="card-content is-hidden">
+                        <div class="content">
+                            <div class="is-flex is-justify-content-space-between">
+                                <div>
+                                    <p class="subtitle is-5">Matched Text</p>
+                                </div>
+                                <div>
+                                    <p class="tag is-6 mx-1 is-light is-primary">{{ license.category }}</p>
+                                    <a class="tag is-6 mx-1 is-light is-link" href="{{ license.reference_url }}">ref</a>
+                                    <p class="tag is-6 mx-1 is-light is-info">
+                                        {% if license.start_line == license.end_line %} Line {{ license.start_line }} {% else %} Lines {{ license.start_line }} - {{ license.end_line }} {% endif %}
+                                    </p>
+                                </div>
+                            </div>
+                            <pre>{{ license.matched_text }}</pre>
+                        </div>
+                    </div>
+                </div>
+                {% endfor %}
+            </div>
         </div>
-        <br>
-          {% endfor %}
-      </div>
     </div>
+
 </div>
-{% endblock %}
+{% endblock %} 
 
 {% block scripts %}
 <script type="text/javascript">
-const cards = document.querySelectorAll('.card')
-
-// const cardsicons = document.querySelectorAll('.card-header-icon')
-// cardsicons.forEach(icon => {
-//   icon.addEventListener('click', (e, index) => {
-//     e.preventDefault();
-//     removeActiveCards();
-//     console.log(e)
-//     console.log(index)
-//     icon.parentNode.parentNode.parentNode.querySelector('.card-content').style.display="visible"
-//   })
-// })
-
-// function removeActiveCards() {
-//     cards.forEach(card => {
-//         card.parentNode.parentNode.parentNode.querySelector('.card-content').style.display="none"
-//     })
-// }
+    const cards = document.querySelectorAll('.card-header-icon')
+    
+    cards.forEach(card => {
+      card.addEventListener('click', (event) => {
+        event.preventDefault()
+        content = card.parentElement.parentElement.parentElement.querySelector('.card-content').classList
+        lineTag= card.parentElement.querySelector('.lines').classList
+        if (content.contains('is-hidden')) {
+          content.remove('is-hidden')
+          lineTag.add('is-hidden')
+        } else {
+          content.add('is-hidden')
+          lineTag.remove('is-hidden')
+        }
+      })
+    })
+    
 </script>
 {% endblock %}
\ No newline at end of file
diff --git a/scantext/templates/scantext/license_scan.html b/scantext/templates/scantext/license_scan.html
index 996c51511..cb7d1a2ce 100644
--- a/scantext/templates/scantext/license_scan.html
+++ b/scantext/templates/scantext/license_scan.html
@@ -1,84 +1,50 @@
-{% extends "scanpipe/base.html" %}
-{% load static humanize %}
+{% extends "scanpipe/base.html" %} 
+{% load static humanize %} 
 
 {% block content %}
-
-  <div class="container is-max-desktop">
+<div class="container is-max-desktop">
     {% include 'scanpipe/includes/navbar_header.html' %}
     <div class="mx-5 mb-2">{% include 'scanpipe/includes/messages.html' %}</div>
 
     <section class="section pt-0">
-      <form method="post" action="{% url 'license_scan' %}" >
-        {% csrf_token %}
-      <div class="is-flex is-justify-content-space-between mb-2">
-        <div class="mb-2">
-          <h1 class="title is-5">Scan License</h1>
-        </div>
-        <div>
-          <a href="{% url 'project_add' %}" class="button is-info">New Project</a>
-          <input type="submit" class="button is-link" value="Scan License">
+        <div class="is-flex is-justify-content-space-between is-align-items-center mb-2">
+            <div class="mb-2">
+                <h1 class="title is-5">Scan License</h1>
+            </div>
+            <div>
+                <a href="{% url 'project_list' %}" class="button is-link">Go to Projects</a>
+            </div>
         </div>
-      </div>
-        {{form.input_text}}
-      </form>
+        <form method="post" action="{% url 'license_scan' %}">
+            {% csrf_token %}
+            {{ form.input_text }}
+            <div class="columns mt-1">
+                <div class="file has-name is-fullwidth column is-half">
+                  <label class="file-label">
+                    {{ form.input_file }}
+                    <span class="file-cta">
+                      <span class="file-icon">
+                        <i class="fas fa-upload"></i>
+                      </span>
+                      <span class="file-label">Choose a file to scan…</span>
+                    </span>
+                    <span class="file-name">/home/user/dev/license-text.txt</span>
+                  </label>
+                </div>
+                <div class="column is-half">
+                  <input type="submit" class="button is-fullwidth is-link" value="Scan License">
+                </div>
+            </div>
+        </form>
     </section>
-  </div>
-{% endblock %}
-
-<!--       <div id="editor" style="min-height:730px; border: lightgrey 1px solid;"></div>
- -->
-<!-- 
-{% block scripts %}
-   <script src="{% static 'ace-1.4.12.min.js' %}" crossorigin="anonymous"></script>
-  <script>
-    let editor = ace.edit("editor", {
-      mode: "ace/mode/text",
-      autoScrollEditorIntoView: true,
-      wrap: true,
-      readOnly: false,
-      showPrintMargin: false,
-      highlightActiveLine: true,
-      highlightGutterLine: false,
-      fontSize: 15,
-      foldStyle: "manual",
-      fontFamily: "SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace",
-    });
-
+</div>
+{% endblock %} {% block scripts %}
+<script>
     let form = document.querySelector('form');
 
-    form.addEventListener('submit', (event) => {
-      
-      let background = displayOverlay();
-
-      // The upload progress is only added when input files are provided.
-      // if (!form["input_files"].files.length) return false;
-
-      event.preventDefault();
-
-      // let progress_bar = document.createElement('progress');
-      // progress_bar.className = 'progress is-success is-medium file-upload';
-      // progress_bar.setAttribute('value', '0');
-      // progress_bar.setAttribute('max', '100');
-
-      // let progress_container = document.createElement('div');
-      // progress_container.className = 'container is-max-desktop mt-6 px-6';
-      // progress_container.appendChild(progress_bar)
-      // background.appendChild(progress_container);
-
-      // let form_errors = document.getElementById('form-errors');
-      // displayFormUploadProgress(form, progress_bar, form_errors);
-
-      // The upload progress is only added when input files are provided.
-      // if (!form["input_files"].files.length) return false;
-
-      // event.preventDefault();
-      // console.log(form["input_files"].files);
-
-      // event.preventDefault();
-      hidden_input = document.querySelector('#id_input_text');
-      hidden_input.value = editor.getValue();
-      console.log(hidden_input.value)
-    });
+    // form.addEventListener('submit', (event) => {
 
-  </script>
-{% endblock %} -->
+    //   if (!form["id_input_file"].files.length && !form["id_input_text"].value.length) return false;
+    // });
+</script>
+{% endblock %} -->
\ No newline at end of file
diff --git a/scantext/tests/__init__.py b/scantext/tests/__init__.py
new file mode 100644
index 000000000..1f72cf031
--- /dev/null
+++ b/scantext/tests/__init__.py
@@ -0,0 +1,21 @@
+# SPDX-License-Identifier: Apache-2.0
+#
+# http://nexb.com and https://github.com/nexB/scancode.io
+# The ScanCode.io software is licensed under the Apache License version 2.0.
+# Data generated with ScanCode.io is provided as-is without warranties.
+# ScanCode is a trademark of nexB Inc.
+#
+# You may not use this software except in compliance with the License.
+# You may obtain a copy of the License at: http://apache.org/licenses/LICENSE-2.0
+# Unless required by applicable law or agreed to in writing, software distributed
+# under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+# CONDITIONS OF ANY KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations under the License.
+#
+# Data Generated with ScanCode.io is provided on an "AS IS" BASIS, WITHOUT WARRANTIES
+# OR CONDITIONS OF ANY KIND, either express or implied. No content created from
+# ScanCode.io should be considered or used as legal advice. Consult an Attorney
+# for any legal advice.
+#
+# ScanCode.io is a free software code scanning tool from nexB Inc. and others.
+# Visit https://github.com/nexB/scancode.io for support and download.
diff --git a/scantext/tests/data/LICENSES b/scantext/tests/data/LICENSES
new file mode 100644
index 000000000..1866e7ba7
--- /dev/null
+++ b/scantext/tests/data/LICENSES
@@ -0,0 +1,749 @@
+                                 Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+   1. Definitions.
+
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+
+   END OF TERMS AND CONDITIONS
+
+   APPENDIX: How to apply the Apache License to your work.
+
+      To apply the Apache License to your work, attach the following
+      boilerplate notice, with the fields enclosed by brackets "[]"
+      replaced with your own identifying information. (Don't include
+      the brackets!)  The text should be enclosed in the appropriate
+      comment syntax for the file format. We also recommend that a
+      file or class name and description of purpose be included on the
+      same "printed page" as the copyright notice for easier
+      identification within third-party archives.
+
+   Copyright [yyyy] [name of copyright owner]
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.Redistribution and use in source and binary forms, with or without modification,
+are permitted provided that the following conditions are met:
+
+Redistributions of source code must retain the above copyright notice, this list
+of conditions and the following disclaimer.
+
+Redistributions in binary form must reproduce the above copyright notice, this
+list of conditions and the following disclaimer in the documentation and/or
+other materials provided with the distribution.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
+ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
+ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
+
+1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
+
+3. Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.Permission to use, copy, modify, and/or distribute this software for any purpose with or without fee is hereby granted, provided that the above copyright notice and this permission notice appear in all copies.
+
+THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.GNU LESSER GENERAL PUBLIC LICENSE
+                       Version 2.1, February 1999
+
+ Copyright (C) 1991, 1999 Free Software Foundation, Inc.
+ 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+ Everyone is permitted to copy and distribute verbatim copies of this license document, but changing it is not allowed.
+
+[This is the first released version of the Lesser GPL.  It also counts as the successor of the GNU Library Public License, version 2, hence
+ the version number 2.1.]
+
+                            Preamble
+
+  The licenses for most software are designed to take away your freedom to share and change it.  By contrast, the GNU General Public
+Licenses are intended to guarantee your freedom to share and change free software--to make sure the software is free for all its users.
+
+  This license, the Lesser General Public License, applies to some specially designated software packages--typically libraries--of the
+Free Software Foundation and other authors who decide to use it.  You can use it too, but we suggest you first think carefully about whether
+this license or the ordinary General Public License is the better strategy to use in any particular case, based on the explanations below.
+
+  When we speak of free software, we are referring to freedom of use, not price.  Our General Public Licenses are designed to make sure that
+you have the freedom to distribute copies of free software (and charge for this service if you wish); that you receive source code or can get
+it if you want it; that you can change the software and use pieces of it in new free programs; and that you are informed that you can do
+these things.
+
+  To protect your rights, we need to make restrictions that forbid distributors to deny you these rights or to ask you to surrender these
+rights.  These restrictions translate to certain responsibilities for you if you distribute copies of the library or if you modify it.
+
+  For example, if you distribute copies of the library, whether gratis or for a fee, you must give the recipients all the rights that we gave
+you.  You must make sure that they, too, receive or can get the source code.  If you link other code with the library, you must provide
+complete object files to the recipients, so that they can relink them with the library after making changes to the library and recompiling
+it.  And you must show them these terms so they know their rights.
+
+  We protect your rights with a two-step method: (1) we copyright the library, and (2) we offer you this license, which gives you legal
+permission to copy, distribute and/or modify the library.
+
+  To protect each distributor, we want to make it very clear that there is no warranty for the free library.  Also, if the library is
+modified by someone else and passed on, the recipients should know that what they have is not the original version, so that the original
+author's reputation will not be affected by problems that might be introduced by others.
+
+  Finally, software patents pose a constant threat to the existence of any free program.  We wish to make sure that a company cannot
+effectively restrict the users of a free program by obtaining a restrictive license from a patent holder.  Therefore, we insist that
+any patent license obtained for a version of the library must be consistent with the full freedom of use specified in this license.
+
+  Most GNU software, including some libraries, is covered by the ordinary GNU General Public License.  This license, the GNU Lesser
+General Public License, applies to certain designated libraries, and is quite different from the ordinary General Public License.  We use
+this license for certain libraries in order to permit linking those libraries into non-free programs.
+
+  When a program is linked with a library, whether statically or using a shared library, the combination of the two is legally speaking a
+combined work, a derivative of the original library.  The ordinary General Public License therefore permits such linking only if the
+entire combination fits its criteria of freedom.  The Lesser General Public License permits more lax criteria for linking other code with
+the library.
+
+  We call this license the "Lesser" General Public License because it does Less to protect the user's freedom than the ordinary General
+Public License.  It also provides other free software developers Less of an advantage over competing non-free programs.  These disadvantages
+are the reason we use the ordinary General Public License for many libraries.  However, the Lesser license provides advantages in certain
+special circumstances.
+
+  For example, on rare occasions, there may be a special need to encourage the widest possible use of a certain library, so that it becomes
+a de-facto standard.  To achieve this, non-free programs must be allowed to use the library.  A more frequent case is that a free
+library does the same job as widely used non-free libraries.  In this case, there is little to gain by limiting the free library to free
+software only, so we use the Lesser General Public License.
+
+  In other cases, permission to use a particular library in non-free programs enables a greater number of people to use a large body of
+free software.  For example, permission to use the GNU C Library in non-free programs enables many more people to use the whole GNU
+operating system, as well as its variant, the GNU/Linux operating system.
+
+  Although the Lesser General Public License is Less protective of the users' freedom, it does ensure that the user of a program that is
+linked with the Library has the freedom and the wherewithal to run that program using a modified version of the Library.
+
+  The precise terms and conditions for copying, distribution and modification follow.  Pay close attention to the difference between a
+"work based on the library" and a "work that uses the library".  The former contains code derived from the library, whereas the latter must
+be combined with the library in order to run.
+
+                  GNU LESSER GENERAL PUBLIC LICENSE
+   TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
+
+  0. This License Agreement applies to any software library or other program which contains a notice placed by the copyright holder or
+other authorized party saying it may be distributed under the terms of this Lesser General Public License (also called "this License").
+Each licensee is addressed as "you".
+
+  A "library" means a collection of software functions and/or data prepared so as to be conveniently linked with application programs
+(which use some of those functions and data) to form executables.
+
+  The "Library", below, refers to any such software library or work which has been distributed under these terms.  A "work based on the
+Library" means either the Library or any derivative work under copyright law: that is to say, a work containing the Library or a
+portion of it, either verbatim or with modifications and/or translated straightforwardly into another language.  (Hereinafter, translation is
+included without limitation in the term "modification".)
+
+  "Source code" for a work means the preferred form of the work for making modifications to it.  For a library, complete source code means
+all the source code for all modules it contains, plus any associated interface definition files, plus the scripts used to control compilation
+and installation of the library.
+
+  Activities other than copying, distribution and modification are not covered by this License; they are outside its scope.  The act of
+running a program using the Library is not restricted, and output from such a program is covered only if its contents constitute a work based
+on the Library (independent of the use of the Library in a tool for writing it).  Whether that is true depends on what the Library does
+and what the program that uses the Library does.
+
+  1. You may copy and distribute verbatim copies of the Library's complete source code as you receive it, in any medium, provided that
+you conspicuously and appropriately publish on each copy an appropriate copyright notice and disclaimer of warranty; keep intact
+all the notices that refer to this License and to the absence of any warranty; and distribute a copy of this License along with the
+Library.
+
+  You may charge a fee for the physical act of transferring a copy, and you may at your option offer warranty protection in exchange for a
+fee.
+
+  2. You may modify your copy or copies of the Library or any portion of it, thus forming a work based on the Library, and copy and
+distribute such modifications or work under the terms of Section 1 above, provided that you also meet all of these conditions:
+
+    a) The modified work must itself be a software library.
+
+    b) You must cause the files modified to carry prominent notices stating that you changed the files and the date of any change.
+
+    c) You must cause the whole of the work to be licensed at no charge to all third parties under the terms of this License.
+
+    d) If a facility in the modified Library refers to a function or a table of data to be supplied by an application program that uses
+    the facility, other than as an argument passed when the facility is invoked, then you must make a good faith effort to ensure that,
+    in the event an application does not supply such function or table, the facility still operates, and performs whatever part of
+    its purpose remains meaningful.
+
+    (For example, a function in a library to compute square roots has a purpose that is entirely well-defined independent of the
+    application.  Therefore, Subsection 2d requires that any application-supplied function or table used by this function must
+    be optional: if the application does not supply it, the square root function must still compute square roots.)
+
+These requirements apply to the modified work as a whole.  If identifiable sections of that work are not derived from the Library,
+and can be reasonably considered independent and separate works in themselves, then this License, and its terms, do not apply to those
+sections when you distribute them as separate works.  But when you distribute the same sections as part of a whole which is a work based
+on the Library, the distribution of the whole must be on the terms of this License, whose permissions for other licensees extend to the
+entire whole, and thus to each and every part regardless of who wrote it.
+
+Thus, it is not the intent of this section to claim rights or contest your rights to work written entirely by you; rather, the intent is to
+exercise the right to control the distribution of derivative or collective works based on the Library.
+
+In addition, mere aggregation of another work not based on the Library with the Library (or with a work based on the Library) on a volume of
+a storage or distribution medium does not bring the other work under the scope of this License.
+
+  3. You may opt to apply the terms of the ordinary GNU General Public License instead of this License to a given copy of the Library.  To do
+this, you must alter all the notices that refer to this License, so that they refer to the ordinary GNU General Public License, version 2,
+instead of to this License.  (If a newer version than version 2 of the ordinary GNU General Public License has appeared, then you can specify
+that version instead if you wish.)  Do not make any other change in these notices.
+
+  Once this change is made in a given copy, it is irreversible for that copy, so the ordinary GNU General Public License applies to all
+subsequent copies and derivative works made from that copy.
+
+  This option is useful when you wish to copy part of the code of the Library into a program that is not a library.
+
+  4. You may copy and distribute the Library (or a portion or derivative of it, under Section 2) in object code or executable form
+under the terms of Sections 1 and 2 above provided that you accompany it with the complete corresponding machine-readable source code, which
+must be distributed under the terms of Sections 1 and 2 above on a medium customarily used for software interchange.
+
+  If distribution of object code is made by offering access to copy from a designated place, then offering equivalent access to copy the
+source code from the same place satisfies the requirement to distribute the source code, even though third parties are not
+compelled to copy the source along with the object code.
+
+  5. A program that contains no derivative of any portion of the Library, but is designed to work with the Library by being compiled or
+linked with it, is called a "work that uses the Library".  Such a work, in isolation, is not a derivative work of the Library, and
+therefore falls outside the scope of this License.
+
+  However, linking a "work that uses the Library" with the Library creates an executable that is a derivative of the Library (because it
+contains portions of the Library), rather than a "work that uses the library".  The executable is therefore covered by this License.
+Section 6 states terms for distribution of such executables.
+
+  When a "work that uses the Library" uses material from a header file that is part of the Library, the object code for the work may be a
+derivative work of the Library even though the source code is not.  Whether this is true is especially significant if the work can be
+linked without the Library, or if the work is itself a library.  The threshold for this to be true is not precisely defined by law.
+
+  If such an object file uses only numerical parameters, data structure layouts and accessors, and small macros and small inline
+functions (ten lines or less in length), then the use of the object file is unrestricted, regardless of whether it is legally a derivative
+work.  (Executables containing this object code plus portions of the Library will still fall under Section 6.)
+
+  Otherwise, if the work is a derivative of the Library, you may distribute the object code for the work under the terms of Section 6.
+Any executables containing that work also fall under Section 6, whether or not they are linked directly with the Library itself.
+
+  6. As an exception to the Sections above, you may also combine or link a "work that uses the Library" with the Library to produce a
+work containing portions of the Library, and distribute that work under terms of your choice, provided that the terms permit
+modification of the work for the customer's own use and reverse engineering for debugging such modifications.
+
+  You must give prominent notice with each copy of the work that the Library is used in it and that the Library and its use are covered by
+this License.  You must supply a copy of this License.  If the work during execution displays copyright notices, you must include the
+copyright notice for the Library among them, as well as a reference directing the user to the copy of this License.  Also, you must do one
+of these things:
+
+    a) Accompany the work with the complete corresponding machine-readable source code for the Library including whatever
+    changes were used in the work (which must be distributed under Sections 1 and 2 above); and, if the work is an executable linked
+    with the Library, with the complete machine-readable "work that uses the Library", as object code and/or source code, so that the
+    user can modify the Library and then relink to produce a modified executable containing the modified Library.  (It is understood
+    that the user who changes the contents of definitions files in the Library will not necessarily be able to recompile the application
+    to use the modified definitions.)
+
+    b) Use a suitable shared library mechanism for linking with the Library.  A suitable mechanism is one that (1) uses at run time a
+    copy of the library already present on the user's computer system, rather than copying library functions into the executable, and (2)
+    will operate properly with a modified version of the library, if the user installs one, as long as the modified version is
+    interface-compatible with the version that the work was made with.
+
+    c) Accompany the work with a written offer, valid for at least three years, to give the same user the materials
+    specified in Subsection 6a, above, for a charge no more than the cost of performing this distribution.
+
+    d) If distribution of the work is made by offering access to copy from a designated place, offer equivalent access to copy the above
+    specified materials from the same place.
+
+    e) Verify that the user has already received a copy of these materials or that you have already sent this user a copy.
+
+  For an executable, the required form of the "work that uses the Library" must include any data and utility programs needed for
+reproducing the executable from it.  However, as a special exception, the materials to be distributed need not include anything that is
+normally distributed (in either source or binary form) with the major components (compiler, kernel, and so on) of the operating system on
+which the executable runs, unless that component itself accompanies the executable.
+
+  It may happen that this requirement contradicts the license restrictions of other proprietary libraries that do not normally
+accompany the operating system.  Such a contradiction means you cannot use both them and the Library together in an executable that you
+distribute.
+
+  7. You may place library facilities that are a work based on the Library side-by-side in a single library together with other library
+facilities not covered by this License, and distribute such a combined library, provided that the separate distribution of the work based on
+the Library and of the other library facilities is otherwise permitted, and provided that you do these two things:
+
+    a) Accompany the combined library with a copy of the same work based on the Library, uncombined with any other library
+    facilities.  This must be distributed under the terms of the Sections above.
+
+    b) Give prominent notice with the combined library of the fact that part of it is a work based on the Library, and explaining
+    where to find the accompanying uncombined form of the same work.
+
+  8. You may not copy, modify, sublicense, link with, or distribute the Library except as expressly provided under this License.  Any
+attempt otherwise to copy, modify, sublicense, link with, or distribute the Library is void, and will automatically terminate your
+rights under this License.  However, parties who have received copies, or rights, from you under this License will not have their licenses
+terminated so long as such parties remain in full compliance.
+
+  9. You are not required to accept this License, since you have not signed it.  However, nothing else grants you permission to modify or
+distribute the Library or its derivative works.  These actions are prohibited by law if you do not accept this License.  Therefore, by
+modifying or distributing the Library (or any work based on the Library), you indicate your acceptance of this License to do so, and
+all its terms and conditions for copying, distributing or modifying the Library or works based on it.
+
+  10. Each time you redistribute the Library (or any work based on the Library), the recipient automatically receives a license from the
+original licensor to copy, distribute, link with or modify the Library subject to these terms and conditions.  You may not impose any further
+restrictions on the recipients' exercise of the rights granted herein. You are not responsible for enforcing compliance by third parties with
+this License.
+
+  11. If, as a consequence of a court judgment or allegation of patent infringement or for any other reason (not limited to patent issues),
+conditions are imposed on you (whether by court order, agreement or otherwise) that contradict the conditions of this License, they do not
+excuse you from the conditions of this License.  If you cannot distribute so as to satisfy simultaneously your obligations under this
+License and any other pertinent obligations, then as a consequence you may not distribute the Library at all.  For example, if a patent
+license would not permit royalty-free redistribution of the Library by all those who receive copies directly or indirectly through you, then
+the only way you could satisfy both it and this License would be to refrain entirely from distribution of the Library.
+
+If any portion of this section is held invalid or unenforceable under any particular circumstance, the balance of the section is intended to apply,
+and the section as a whole is intended to apply in other circumstances.
+
+It is not the purpose of this section to induce you to infringe any patents or other property right claims or to contest validity of any
+such claims; this section has the sole purpose of protecting the integrity of the free software distribution system which is
+implemented by public license practices.  Many people have made generous contributions to the wide range of software distributed
+through that system in reliance on consistent application of that system; it is up to the author/donor to decide if he or she is willing
+to distribute software through any other system and a licensee cannot impose that choice.
+
+This section is intended to make thoroughly clear what is believed to be a consequence of the rest of this License.
+
+  12. If the distribution and/or use of the Library is restricted in certain countries either by patents or by copyrighted interfaces, the
+original copyright holder who places the Library under this License may add an explicit geographical distribution limitation excluding those countries,
+so that distribution is permitted only in or among countries not thus excluded.  In such case, this License incorporates the limitation as if
+written in the body of this License.
+
+  13. The Free Software Foundation may publish revised and/or new versions of the Lesser General Public License from time to time.
+Such new versions will be similar in spirit to the present version, but may differ in detail to address new problems or concerns.
+
+Each version is given a distinguishing version number.  If the Library specifies a version number of this License which applies to it and
+"any later version", you have the option of following the terms and conditions either of that version or of any later version published by
+the Free Software Foundation.  If the Library does not specify a license version number, you may choose any version ever published by
+the Free Software Foundation.
+
+  14. If you wish to incorporate parts of the Library into other free programs whose distribution conditions are incompatible with these,
+write to the author to ask for permission.  For software which is copyrighted by the Free Software Foundation, write to the Free
+Software Foundation; we sometimes make exceptions for this.  Our decision will be guided by the two goals of preserving the free status
+of all derivatives of our free software and of promoting the sharing and reuse of software generally.
+
+                            NO WARRANTY
+
+  15. BECAUSE THE LIBRARY IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY FOR THE LIBRARY, TO THE EXTENT PERMITTED BY APPLICABLE LAW.
+EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES PROVIDE THE LIBRARY "AS IS" WITHOUT WARRANTY OF ANY
+KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+PURPOSE.  THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE LIBRARY IS WITH YOU.  SHOULD THE LIBRARY PROVE DEFECTIVE, YOU ASSUME
+THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
+
+  16. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY
+AND/OR REDISTRIBUTE THE LIBRARY AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR
+CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE LIBRARY (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING
+RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A FAILURE OF THE LIBRARY TO OPERATE WITH ANY OTHER SOFTWARE), EVEN IF
+SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES.
+
+                     END OF TERMS AND CONDITIONS
+
+           How to Apply These Terms to Your New Libraries
+
+  If you develop a new library, and you want it to be of the greatest possible use to the public, we recommend making it free software that
+everyone can redistribute and change.  You can do so by permitting redistribution under these terms (or, alternatively, under the terms of the
+ordinary General Public License).
+
+  To apply these terms, attach the following notices to the library.  It is safest to attach them to the start of each source file to most effectively
+convey the exclusion of warranty; and each file should have at least the "copyright" line and a pointer to where the full notice is found.
+
+    <one line to give the library's name and a brief idea of what it does.>
+    Copyright (C) <year>  <name of author>
+
+    This library is free software; you can redistribute it and/or
+    modify it under the terms of the GNU Lesser General Public
+    License as published by the Free Software Foundation; either
+    version 2.1 of the License, or (at your option) any later version.
+
+    This library is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+    Lesser General Public License for more details.
+
+    You should have received a copy of the GNU Lesser General Public
+    License along with this library; if not, write to the Free Software
+    Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+
+Also add information on how to contact you by electronic and paper mail.
+
+You should also get your employer (if you work as a programmer) or your school, if any, to sign a "copyright disclaimer" for the library, if
+necessary.  Here is a sample; alter the names:
+
+  Yoyodyne, Inc., hereby disclaims all copyright interest in the
+  library `Frob' (a library for tweaking knobs) written by James Random Hacker.
+
+  <signature of Ty Coon>, 1 April 1990
+  Ty Coon, President of Vice
+
+That's all there is to it!Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.PYTHON SOFTWARE FOUNDATION LICENSE VERSION 2 
+-------------------------------------------- 
+
+1. This LICENSE AGREEMENT is between the Python Software Foundation 
+("PSF"), and the Individual or Organization ("Licensee") accessing and 
+otherwise using this software ("Python") in source or binary form and 
+its associated documentation. 
+
+2. Subject to the terms and conditions of this License Agreement, PSF 
+hereby grants Licensee a nonexclusive, royalty-free, world-wide 
+license to reproduce, analyze, test, perform and/or display publicly, 
+prepare derivative works, distribute, and otherwise use Python 
+alone or in any derivative version, provided, however, that PSF's 
+License Agreement and PSF's notice of copyright, i.e., "Copyright (c) 
+2001, 2002, 2003, 2004, 2005, 2006 Python Software Foundation; All Rights 
+Reserved" are retained in Python alone or in any derivative version 
+prepared by Licensee. 
+
+3. In the event Licensee prepares a derivative work that is based on 
+or incorporates Python or any part thereof, and wants to make 
+the derivative work available to others as provided herein, then 
+Licensee hereby agrees to include in any such work a brief summary of 
+the changes made to Python. 
+
+4. PSF is making Python available to Licensee on an "AS IS" 
+basis. PSF MAKES NO REPRESENTATIONS OR WARRANTIES, EXPRESS OR 
+IMPLIED. BY WAY OF EXAMPLE, BUT NOT LIMITATION, PSF MAKES NO AND 
+DISCLAIMS ANY REPRESENTATION OR WARRANTY OF MERCHANTABILITY OR FITNESS 
+FOR ANY PARTICULAR PURPOSE OR THAT THE USE OF PYTHON WILL NOT 
+INFRINGE ANY THIRD PARTY RIGHTS. 
+
+5. PSF SHALL NOT BE LIABLE TO LICENSEE OR ANY OTHER USERS OF PYTHON 
+FOR ANY INCIDENTAL, SPECIAL, OR CONSEQUENTIAL DAMAGES OR LOSS AS 
+A RESULT OF MODIFYING, DISTRIBUTING, OR OTHERWISE USING PYTHON, 
+OR ANY DERIVATIVE THEREOF, EVEN IF ADVISED OF THE POSSIBILITY THEREOF. 
+
+6. This License Agreement will automatically terminate upon a material 
+breach of its terms and conditions. 
+
+7. Nothing in this License Agreement shall be deemed to create any 
+relationship of agency, partnership, or joint venture between PSF and 
+Licensee. This License Agreement does not grant permission to use PSF 
+trademarks or trade name in a trademark sense to endorse or promote 
+products or services of Licensee, or any third party. 
+
+8. By copying, installing or otherwise using Python, Licensee 
+agrees to be bound by the terms and conditions of this License 
+Agreement. 
+
+BEOPEN.COM LICENSE AGREEMENT FOR PYTHON 2.0 
+------------------------------------------- 
+
+BEOPEN PYTHON OPEN SOURCE LICENSE AGREEMENT VERSION 1 
+
+1. This LICENSE AGREEMENT is between BeOpen.com ("BeOpen"), having an 
+office at 160 Saratoga Avenue, Santa Clara, CA 95051, and the 
+Individual or Organization ("Licensee") accessing and otherwise using 
+this software in source or binary form and its associated 
+documentation ("the Software"). 
+
+2. Subject to the terms and conditions of this BeOpen Python License 
+Agreement, BeOpen hereby grants Licensee a non-exclusive, 
+royalty-free, world-wide license to reproduce, analyze, test, perform 
+and/or display publicly, prepare derivative works, distribute, and 
+otherwise use the Software alone or in any derivative version, 
+provided, however, that the BeOpen Python License is retained in the 
+Software, alone or in any derivative version prepared by Licensee. 
+
+3. BeOpen is making the Software available to Licensee on an "AS IS" 
+basis. BEOPEN MAKES NO REPRESENTATIONS OR WARRANTIES, EXPRESS OR 
+IMPLIED. BY WAY OF EXAMPLE, BUT NOT LIMITATION, BEOPEN MAKES NO AND 
+DISCLAIMS ANY REPRESENTATION OR WARRANTY OF MERCHANTABILITY OR FITNESS 
+FOR ANY PARTICULAR PURPOSE OR THAT THE USE OF THE SOFTWARE WILL NOT 
+INFRINGE ANY THIRD PARTY RIGHTS. 
+
+4. BEOPEN SHALL NOT BE LIABLE TO LICENSEE OR ANY OTHER USERS OF THE 
+SOFTWARE FOR ANY INCIDENTAL, SPECIAL, OR CONSEQUENTIAL DAMAGES OR LOSS 
+AS A RESULT OF USING, MODIFYING OR DISTRIBUTING THE SOFTWARE, OR ANY 
+DERIVATIVE THEREOF, EVEN IF ADVISED OF THE POSSIBILITY THEREOF. 
+
+5. This License Agreement will automatically terminate upon a material 
+breach of its terms and conditions. 
+
+6. This License Agreement shall be governed by and interpreted in all 
+respects by the law of the State of California, excluding conflict of 
+law provisions. Nothing in this License Agreement shall be deemed to 
+create any relationship of agency, partnership, or joint venture 
+between BeOpen and Licensee. This License Agreement does not grant 
+permission to use BeOpen trademarks or trade names in a trademark 
+sense to endorse or promote products or services of Licensee, or any 
+third party. As an exception, the "BeOpen Python" logos available at 
+http://www.pythonlabs.com/logos.html may be used according to the 
+permissions granted on that web page. 
+
+7. By copying, installing or otherwise using the software, Licensee 
+agrees to be bound by the terms and conditions of this License 
+Agreement. 
+
+CNRI OPEN SOURCE LICENSE AGREEMENT (for Python 1.6b1) 
+-------------------------------------------------- 
+
+IMPORTANT: PLEASE READ THE FOLLOWING AGREEMENT CAREFULLY. 
+
+BY CLICKING ON "ACCEPT" WHERE INDICATED BELOW, OR BY COPYING, 
+INSTALLING OR OTHERWISE USING PYTHON 1.6, beta 1 SOFTWARE, YOU ARE 
+DEEMED TO HAVE AGREED TO THE TERMS AND CONDITIONS OF THIS LICENSE 
+AGREEMENT. 
+
+1. This LICENSE AGREEMENT is between the Corporation for National 
+Research Initiatives, having an office at 1895 Preston White Drive, 
+Reston, VA 20191 ("CNRI"), and the Individual or Organization 
+("Licensee") accessing and otherwise using Python 1.6, beta 1 
+software in source or binary form and its associated documentation, 
+as released at the www.python.org Internet site on August 4, 2000 
+("Python 1.6b1"). 
+
+2. Subject to the terms and conditions of this License Agreement, CNRI 
+hereby grants Licensee a non-exclusive, royalty-free, world-wide 
+license to reproduce, analyze, test, perform and/or display 
+publicly, prepare derivative works, distribute, and otherwise use 
+Python 1.6b1 alone or in any derivative version, provided, however, 
+that CNRIs License Agreement is retained in Python 1.6b1, alone or 
+in any derivative version prepared by Licensee. 
+
+Alternately, in lieu of CNRIs License Agreement, Licensee may 
+substitute the following text (omitting the quotes): "Python 1.6, 
+beta 1, is made available subject to the terms and conditions in 
+CNRIs License Agreement. This Agreement may be located on the 
+Internet using the following unique, persistent identifier (known 
+as a handle): 1895.22/1011. This Agreement may also be obtained 
+from a proxy server on the Internet using the 
+URL:http://hdl.handle.net/1895.22/1011". 
+
+3. In the event Licensee prepares a derivative work that is based on 
+or incorporates Python 1.6b1 or any part thereof, and wants to make 
+the derivative work available to the public as provided herein, 
+then Licensee hereby agrees to indicate in any such work the nature 
+of the modifications made to Python 1.6b1. 
+
+4. CNRI is making Python 1.6b1 available to Licensee on an "AS IS" 
+basis. CNRI MAKES NO REPRESENTATIONS OR WARRANTIES, EXPRESS OR 
+IMPLIED. BY WAY OF EXAMPLE, BUT NOT LIMITATION, CNRI MAKES NO AND 
+DISCLAIMS ANY REPRESENTATION OR WARRANTY OF MERCHANTABILITY OR 
+FITNESS FOR ANY PARTICULAR PURPOSE OR THAT THE USE OF PYTHON 1.6b1 
+WILL NOT INFRINGE ANY THIRD PARTY RIGHTS. 
+
+5. CNRI SHALL NOT BE LIABLE TO LICENSEE OR ANY OTHER USERS OF THE 
+SOFTWARE FOR ANY INCIDENTAL, SPECIAL, OR CONSEQUENTIAL DAMAGES OR 
+LOSS AS A RESULT OF USING, MODIFYING OR DISTRIBUTING PYTHON 1.6b1, 
+OR ANY DERIVATIVE THEREOF, EVEN IF ADVISED OF THE POSSIBILITY 
+THEREOF. 
+
+6. This License Agreement will automatically terminate upon a material 
+breach of its terms and conditions. 
+
+7. This License Agreement shall be governed by and interpreted in all 
+respects by the law of the State of Virginia, excluding conflict of 
+law provisions. Nothing in this License Agreement shall be deemed 
+to create any relationship of agency, partnership, or joint venture 
+between CNRI and Licensee. This License Agreement does not grant 
+permission to use CNRI trademarks or trade name in a trademark 
+sense to endorse or promote products or services of Licensee, or 
+any third party. 
+
+8. By clicking on the "ACCEPT" button where indicated, or by copying, 
+installing or otherwise using Python 1.6b1, Licensee agrees to be 
+bound by the terms and conditions of this License Agreement. 
+
+ACCEPT 
+
+CWI LICENSE AGREEMENT FOR PYTHON 0.9.0 THROUGH 1.2 
+-------------------------------------------------- 
+
+Copyright (c) 1991 - 1995, Stichting Mathematisch Centrum Amsterdam, 
+The Netherlands. All rights reserved. 
+
+Permission to use, copy, modify, and distribute this software and its 
+documentation for any purpose and without fee is hereby granted, 
+provided that the above copyright notice appear in all copies and that 
+both that copyright notice and this permission notice appear in 
+supporting documentation, and that the name of Stichting Mathematisch 
+Centrum or CWI not be used in advertising or publicity pertaining to 
+distribution of the software without specific, written prior 
+permission. 
+
+STICHTING MATHEMATISCH CENTRUM DISCLAIMS ALL WARRANTIES WITH REGARD TO 
+THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND 
+FITNESS, IN NO EVENT SHALL STICHTING MATHEMATISCH CENTRUM BE LIABLE 
+FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 
+WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 
+ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT 
+OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
\ No newline at end of file
diff --git a/scantext/tests.py b/scantext/tests/test.py
similarity index 100%
rename from scantext/tests.py
rename to scantext/tests/test.py
diff --git a/scantext/views.py b/scantext/views.py
index cd670a316..95e05f521 100644
--- a/scantext/views.py
+++ b/scantext/views.py
@@ -22,16 +22,17 @@
 
 import pprint
 import sys
+import tempfile
 
-from django.http import HttpResponseRedirect
+from django.conf import settings
 from django.shortcuts import render
 from django.views import generic
 
-from licensedcode import cache
-
-from scancodeio.auth import ConditionalLoginRequired
 from scantext.forms import EditorForm
 
+SCANCODE_BASE_URL = "https://github.com/nexB/scancode-toolkit/tree/develop/src/licensedcode/data/licenses"
+SCANCODE_LICENSE_TEXT_URL = SCANCODE_BASE_URL + "/{}.LICENSE"
+SCANCODE_LICENSE_DATA_URL = SCANCODE_BASE_URL + "/{}.yml"
 SPDX_LICENSE_URL = "https://spdx.org/licenses/{}"
 DEJACODE_LICENSE_URL = "https://enterprise.dejacode.com/urn/urn:dje:license:{}"
 SCANCODE_LICENSEDB_URL = "https://scancode-licensedb.aboutcode.org/{}"
@@ -42,19 +43,28 @@ def license_scanview(request):
     if request.method == "POST":
         form = EditorForm(request.POST)
         if form.is_valid():
-            # idx = cache.get_index()
-            # matches = idx.match(query_string=text)
-            # print(type(matches))
-            # print(type(matches[0].rule))
             text = form.cleaned_data["input_text"]
-            expressions = get_licenses("/home/human/Desktop/license-text.txt")
-            pp = pprint.PrettyPrinter(indent=4)
-            pp.pprint(expressions)
+            # license_location = tempfile.NamedTemporaryFile(mode="w", prefix="license_scan_", dir=settings.SCANCODEIO_WORKSPACE_LOCATION)
+            # with license_location as f:
+            #     f.write(text)
+            #     f.flush()
+            #     x=get_licenses(location=f)
+            #     f.close()
+            # the get_licenses in the above code (line 56) returns this error
+            # error:
+            # expected str, bytes or os.PathLike object, not _TemporaryFileWrapper
+            # the below code just works
+
+            expressions = get_licenses(
+                location="scantext/tests/data/LICENSES",
+                include_text=True,
+                license_text_diagnostics=True,
+            )
             return render(
                 request,
                 "scantext/license_detail.html",
                 {
-                    "text": text.split("\r"),
+                    "text": text,
                     "expr": expressions,
                 },
             )
@@ -161,10 +171,6 @@ def _licenses_data_from_match(
         else:
             matched_text = match.matched_text(whole_lines=True, highlight=False)
 
-    SCANCODE_BASE_URL = "https://github.com/nexB/scancode-toolkit/tree/develop/src/licensedcode/data/licenses"
-    SCANCODE_LICENSE_TEXT_URL = SCANCODE_BASE_URL + "/{}.LICENSE"
-    SCANCODE_LICENSE_DATA_URL = SCANCODE_BASE_URL + "/{}.yml"
-
     detected_licenses = []
     for license_key in match.rule.license_keys():
         lic = licenses.get(license_key)
@@ -219,94 +225,3 @@ def _licenses_data_from_match(
         if include_text:
             result["matched_text"] = matched_text
     return detected_licenses
-
-
-# class LicenseScanView(ConditionalLoginRequired, generic.FormView):
-#      template_name = "scantext/license_scan.html"
-#      form_class = EditorForm
-
-#      def form_valid(self, form):
-#          idx = cache.get_index()
-#          text = form.cleaned_data["input_text"]
-#          matches = idx.match(query_string=text)
-#          print(matches)
-#          return HttpResponseRedirect("/scan/history/detail/", {
-#             "matches": matches
-#             })
-# print(form.cleaned_data["input_text"])
-
-#     import magic
-#     def is_text(self, form):
-#         return magic.from_file(self.request.files) == 'text/plain'
-
-#     def form_invalid(self, form):
-#         print("No")
-
-# def LicenseScanView(request):
-#     if request.method == 'POST':
-#         form = EditorForm(request.POST, request.FILES)
-#         if form.is_valid():
-#             print(type(request.FILES['input_files']))
-#             return HttpResponseRedirect('/scan/history/')
-
-#     return render(request, "scantext/license_scan.html", {
-#         "form": EditorForm
-#         })
-
-# class LicenseListView(ConditionalLoginRequired, generic.TemplateView):
-#    template_name = "scantext/license_list.html"
-#
-#
-# class LicenseDetailView(ConditionalLoginRequired, generic.DetailView):
-#    model = License
-#    template_name = "scantext/license_detail.html"
-#
-#
-#    def get_context_data(self, **kwargs):
-#        context = super().get_context_data(**kwargs)
-#        context['now'] = context.objects.all()
-#        return context
-#
-#
-# class LicenseReportView(ConditionalLoginRequired, generic.DetailView):
-#    template_name = "scantext/license_report.html"
-#
-#
-#
-# import ast
-# import json
-# def template_vv(request):
-#
-#    with open("/home/human/dev/sco/output.json", "r") as f:
-#        data = f.read()
-#        # print(data)
-#        co = json.loads(ast.literal_eval(json.dumps(data)))
-#        # print(co)
-#        # print(type(co))
-#        return render(request, "scantext/license_list.html", {
-#        "co": co
-#        })
-#
-# def template_dt(request):
-#
-#    with open("/home/human/dev/sco/output.json", "r") as f:
-#        data = f.read()
-#        # print(data)
-#        co = json.loads(ast.literal_eval(json.dumps(data, sort_keys=True, indent=4)))
-#        # print(co)
-#        # print(type(co))
-#        return render(request, "scantext/license_detail.html", {
-#        "co": co
-#        })
-#
-#    # def get_context_data(self, **kwargs):
-#    #     context = super().get_context_data(**kwargs)
-#    #     context['now'] = json.dumps(co)
-#    #     return context
-#
-#
-#
-# class LicenseResultView(ConditionalLoginRequired, generic.DetailView):
-#    model = License
-#    template_name = "scantext/license_detail.html"
-#

From 49fe7a513bb2b523dc00dfcb1d6c8823e5691d65 Mon Sep 17 00:00:00 2001
From: Akhil Raj <lf32.dev@gmail.com>
Date: Mon, 20 Jun 2022 12:00:00 +0530
Subject: [PATCH 03/59] changed card title from name to short_name

Signed-off-by: Akhil Raj <lf32.dev@gmail.com>
---
 scantext/admin.py                             | 25 -------------------
 .../templates/scantext/license_detail.html    |  4 +--
 2 files changed, 2 insertions(+), 27 deletions(-)
 delete mode 100644 scantext/admin.py

diff --git a/scantext/admin.py b/scantext/admin.py
deleted file mode 100644
index 9414bde9c..000000000
--- a/scantext/admin.py
+++ /dev/null
@@ -1,25 +0,0 @@
-# SPDX-License-Identifier: Apache-2.0
-#
-# http://nexb.com and https://github.com/nexB/scancode.io
-# The ScanCode.io software is licensed under the Apache License version 2.0.
-# Data generated with ScanCode.io is provided as-is without warranties.
-# ScanCode is a trademark of nexB Inc.
-#
-# You may not use this software except in compliance with the License.
-# You may obtain a copy of the License at: http://apache.org/licenses/LICENSE-2.0
-# Unless required by applicable law or agreed to in writing, software distributed
-# under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
-# CONDITIONS OF ANY KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations under the License.
-#
-# Data Generated with ScanCode.io is provided on an "AS IS" BASIS, WITHOUT WARRANTIES
-# OR CONDITIONS OF ANY KIND, either express or implied. No content created from
-# ScanCode.io should be considered or used as legal advice. Consult an Attorney
-# for any legal advice.
-#
-# ScanCode.io is a free software code scanning tool from nexB Inc. and others.
-# Visit https://github.com/nexB/scancode.io for support and download.
-
-from django.contrib import admin
-
-# Register your models here.
diff --git a/scantext/templates/scantext/license_detail.html b/scantext/templates/scantext/license_detail.html
index dedfc259f..62bde5d95 100644
--- a/scantext/templates/scantext/license_detail.html
+++ b/scantext/templates/scantext/license_detail.html
@@ -34,8 +34,8 @@ <h1 class="title is-4">License Detection Summary</h1>
                     <div class="card-header is-flex is-justify-content-space-between">
                         <div class="card-header-title" title="{{ license.short_name }}">
                           {% if license.homepage_url %}
-                            <a href="{{ license.homepage_url }}">{{ license.name }}</a> 
-                          {% else %} {{ license.name }} {% endif %}
+                            <a href="{{ license.homepage_url }}" title="{{ license.name }} ({{ license.short_name }})">{{ license.short_name }}</a> 
+                          {% else %} {{ license.short_name }} {% endif %}
                         </div>
                         <div class="is-flex is-justify-content-row is-align-items-center">
                             <p class="lines tag is-6 mx-1 is-light is-info">

From cb9de2df9a3c1b70fdf8a0fef903fbd3a11eda3e Mon Sep 17 00:00:00 2001
From: Akhil Raj <lf32.dev@gmail.com>
Date: Mon, 20 Jun 2022 23:59:19 +0530
Subject: [PATCH 04/59] Improved License Input and Details UI #450

* Set Input Textarea to 15
* Added file-upload into form
* Changed <pre> to <textarea> in the details page
* Modified Navigation Bar and Renamed Scan Again Button

Signed-off-by: Akhil Raj <lf32.dev@gmail.com>
---
 scanpipe/templates/scanpipe/base.html         |  2 +-
 .../scanpipe/includes/navbar_header.html      |  3 +
 scantext/forms.py                             |  4 +-
 .../includes/license_detail_card.html         | 39 ++++++++++
 .../includes/license_summary_level.html       |  8 +--
 .../templates/scantext/license_detail.html    | 72 ++++---------------
 ...cense_scan.html => license_scan_form.html} | 41 +++++++----
 7 files changed, 90 insertions(+), 79 deletions(-)
 create mode 100644 scantext/templates/scantext/includes/license_detail_card.html
 rename scantext/templates/scantext/{license_scan.html => license_scan_form.html} (58%)

diff --git a/scanpipe/templates/scanpipe/base.html b/scanpipe/templates/scanpipe/base.html
index 5309a73d1..fbba32644 100644
--- a/scanpipe/templates/scanpipe/base.html
+++ b/scanpipe/templates/scanpipe/base.html
@@ -57,4 +57,4 @@
     <script src="{% static 'htmx-1.7.0.min.js' %}" crossorigin="anonymous" defer></script>
     {% block scripts %}{% endblock %}
   </body>
-</html>
+</html>
\ No newline at end of file
diff --git a/scanpipe/templates/scanpipe/includes/navbar_header.html b/scanpipe/templates/scanpipe/includes/navbar_header.html
index af72a2dcb..4e637f1fd 100644
--- a/scanpipe/templates/scanpipe/includes/navbar_header.html
+++ b/scanpipe/templates/scanpipe/includes/navbar_header.html
@@ -9,6 +9,9 @@
       <a class="navbar-item" href="{% url 'project_list' %}">
         Projects
       </a>
+      <a class="navbar-item" href="{% url 'license_scan' %}">
+        Scan
+      </a>
       <a class="navbar-item" href="https://scancodeio.readthedocs.org/" target="_blank">
         Documentation
       </a>
diff --git a/scantext/forms.py b/scantext/forms.py
index 88ac7b29e..0f21c4dcb 100644
--- a/scantext/forms.py
+++ b/scantext/forms.py
@@ -23,11 +23,11 @@
 from django import forms
 
 
-class EditorForm(forms.Form):
+class LicenseForm(forms.Form):
     input_text = forms.CharField(
         widget=forms.Textarea(
             attrs={
-                "rows": 25,
+                "rows": 15,
                 "class": "textarea has-fixed-size",
                 "placeholder": "Paste your license text here.",
             }
diff --git a/scantext/templates/scantext/includes/license_detail_card.html b/scantext/templates/scantext/includes/license_detail_card.html
new file mode 100644
index 000000000..451ce8d5f
--- /dev/null
+++ b/scantext/templates/scantext/includes/license_detail_card.html
@@ -0,0 +1,39 @@
+{% for license in result.licenses %}
+<div class="licenses-card">
+    <div class="card-header is-flex is-justify-content-space-between">
+        <div class="card-header-title" title="{{ license.short_name }}">
+            {% if license.homepage_url %}
+            <a href="{{ license.homepage_url }}" title="{{ license.name }} ({{ license.short_name }})">{{ license.short_name }}</a> {% else %} {{ license.short_name }} {% endif %}
+        </div>
+        <div class="is-flex is-justify-content-row is-align-items-center">
+            <p class="lines tag is-6 mx-1 is-light is-info">
+                {% if license.start_line == license.end_line %} Line {{ license.start_line }} {% else %} Lines {{ license.start_line }} - {{ license.end_line }} {% endif %}
+            </p>
+            <p class="tag is-6 mx-1 is-light 
+                  {% if license.score == 100 %} is-success {% else %} is-warning {% endif %}">{{ license.score }}</p>
+            <p class="card-header-icon" aria-label="more options">
+                <span class="icon">
+                    <i class="fas fa-angle-down" aria-hidden="true"></i>
+                </span>
+            </p>
+        </div>
+    </div>
+    <div class="card-content is-hidden">
+        <div class="content">
+            <div class="is-flex is-justify-content-space-between">
+                <div>
+                    <p class="subtitle is-5">Matched Text</p>
+                </div>
+                <div>
+                    <p class="tag is-6 mx-1 is-light is-primary">{{ license.category }}</p>
+                    <a class="tag is-6 mx-1 is-light is-link" href="{{ license.reference_url }}">ref</a>
+                    <p class="tag is-6 mx-1 is-light is-info">
+                        {% if license.start_line == license.end_line %} Line {{ license.start_line }} {% else %} Lines {{ license.start_line }} - {{ license.end_line }} {% endif %}
+                    </p>
+                </div>
+            </div>
+            <textarea class="textarea has-fixed-size" rows="15" style="cursor: text;" disabled>{{ license.matched_text }}</textarea>
+        </div>
+    </div>
+</div>
+{% endfor %}
diff --git a/scantext/templates/scantext/includes/license_summary_level.html b/scantext/templates/scantext/includes/license_summary_level.html
index 284f30944..d5b4ec19f 100644
--- a/scantext/templates/scantext/includes/license_summary_level.html
+++ b/scantext/templates/scantext/includes/license_summary_level.html
@@ -4,7 +4,7 @@
         <div>
             <p class="heading">Percentage Of License Text</p>
             <p class="title">
-              <span>{{ expr.percentage_of_license_text }}</span>
+              <span>{{ result.percentage_of_license_text }}</span>
             </p>
         </div>
     </div>
@@ -12,7 +12,7 @@
         <div>
             <p class="heading">License Expressions</p>
             <p class="title">
-              <span>{{ expr.license_expressions|length }}</span>
+              <span>{{ result.license_expressions|length }}</span>
             </p>
         </div>
     </div>
@@ -20,8 +20,8 @@
         <div>
             <p class="heading">Licenses</p>
             <p class="title">
-              <span>{{ expr.licenses|length }}</span>
+              <span>{{ result.licenses|length }}</span>
             </p>
         </div>
     </div>
-</nav>
\ No newline at end of file
+</nav>
diff --git a/scantext/templates/scantext/license_detail.html b/scantext/templates/scantext/license_detail.html
index 62bde5d95..ae932b83c 100644
--- a/scantext/templates/scantext/license_detail.html
+++ b/scantext/templates/scantext/license_detail.html
@@ -6,69 +6,26 @@
     {% include 'scanpipe/includes/navbar_header.html' %}
     <div class="mx-5 mb-2">{% include 'scanpipe/includes/messages.html' %}</div>
 
-    <section class="section p-0">
-        <div class="is-flex is-justify-content-space-between is-align-items-center mb-2">
-            <div class="mb-2">
-                <h1 class="title is-4">License Detection Summary</h1>
-            </div>
-            <div>
-                <a href="{% url 'license_scan' %}" class="button is-link">Scan Again</a>
-            </div>
+    <section class="mx-1 mb-0">
+        <div class="is-flex is-justify-content-space-between">
+            <h1 class="title is-4">License Detection Summary</h1>
+            <a href="{% url 'license_scan' %}" class="button is-link">New Scan</a>
         </div>
     </section>
 
-    <hr class="mx-5">
-    {% include "scantext/includes/license_summary_level.html" with expr=expr %}
-    <hr class="mx-5">
+    <hr class="mx-1 mt-0">
+    {% include 'scantext/includes/license_summary_level.html' with result=result %}
+    <hr class="mx-1">
 
-    <div class="columns mb-5">
+    <div class="columns mb-5 mx-1">
         <div class="column is-half">
-            <p class="title is-4">Input License Text</p>
-            <pre>{{ text }}</pre>
+            <p class="title is-5">Input License Text</p>
+            <textarea class="textarea has-fixed-size" rows="25" style="cursor: text;" disabled>{{ text }}</textarea>
         </div>
         <div class="column is-half">
-            <p class="title is-4">Detected Licenses</p>
+            <p class="title is-5">Detected Licenses</p>
             <div class="card">
-                {% for license in expr.licenses %}
-                <div class="licenses-card">
-                    <div class="card-header is-flex is-justify-content-space-between">
-                        <div class="card-header-title" title="{{ license.short_name }}">
-                          {% if license.homepage_url %}
-                            <a href="{{ license.homepage_url }}" title="{{ license.name }} ({{ license.short_name }})">{{ license.short_name }}</a> 
-                          {% else %} {{ license.short_name }} {% endif %}
-                        </div>
-                        <div class="is-flex is-justify-content-row is-align-items-center">
-                            <p class="lines tag is-6 mx-1 is-light is-info">
-                              {% if license.start_line == license.end_line %} Line {{ license.start_line }} {% else %} Lines {{ license.start_line }} - {{ license.end_line }} {% endif %}
-                            </p>
-                            <p class="tag is-6 mx-1 is-light 
-                  {% if license.score == 100 %} is-success {% else %} is-warning {% endif %}">{{ license.score }}</p>
-                            <p class="card-header-icon" aria-label="more options">
-                              <span class="icon">
-                                <i class="fas fa-angle-down" aria-hidden="true"></i>
-                              </span>
-                            </p>
-                        </div>
-                    </div>
-                    <div class="card-content is-hidden">
-                        <div class="content">
-                            <div class="is-flex is-justify-content-space-between">
-                                <div>
-                                    <p class="subtitle is-5">Matched Text</p>
-                                </div>
-                                <div>
-                                    <p class="tag is-6 mx-1 is-light is-primary">{{ license.category }}</p>
-                                    <a class="tag is-6 mx-1 is-light is-link" href="{{ license.reference_url }}">ref</a>
-                                    <p class="tag is-6 mx-1 is-light is-info">
-                                        {% if license.start_line == license.end_line %} Line {{ license.start_line }} {% else %} Lines {{ license.start_line }} - {{ license.end_line }} {% endif %}
-                                    </p>
-                                </div>
-                            </div>
-                            <pre>{{ license.matched_text }}</pre>
-                        </div>
-                    </div>
-                </div>
-                {% endfor %}
+                {% include 'scantext/includes/license_detail_card.html' with result=result %}
             </div>
         </div>
     </div>
@@ -93,7 +50,6 @@ <h1 class="title is-4">License Detection Summary</h1>
           lineTag.remove('is-hidden')
         }
       })
-    })
-    
+    })  
 </script>
-{% endblock %}
\ No newline at end of file
+{% endblock %}
diff --git a/scantext/templates/scantext/license_scan.html b/scantext/templates/scantext/license_scan_form.html
similarity index 58%
rename from scantext/templates/scantext/license_scan.html
rename to scantext/templates/scantext/license_scan_form.html
index cb7d1a2ce..77fa980ee 100644
--- a/scantext/templates/scantext/license_scan.html
+++ b/scantext/templates/scantext/license_scan_form.html
@@ -6,14 +6,9 @@
     {% include 'scanpipe/includes/navbar_header.html' %}
     <div class="mx-5 mb-2">{% include 'scanpipe/includes/messages.html' %}</div>
 
-    <section class="section pt-0">
-        <div class="is-flex is-justify-content-space-between is-align-items-center mb-2">
-            <div class="mb-2">
-                <h1 class="title is-5">Scan License</h1>
-            </div>
-            <div>
-                <a href="{% url 'project_list' %}" class="button is-link">Go to Projects</a>
-            </div>
+    <section class="mx-5 mb-4">
+        <div class="mb-4">
+            <h1 class="title is-4">Scan License</h1>
         </div>
         <form method="post" action="{% url 'license_scan' %}">
             {% csrf_token %}
@@ -26,9 +21,9 @@ <h1 class="title is-5">Scan License</h1>
                       <span class="file-icon">
                         <i class="fas fa-upload"></i>
                       </span>
-                      <span class="file-label">Choose a file to scan…</span>
+                      <span class="file-label-text">Choose a file to scan…</span>
                     </span>
-                    <span class="file-name">/home/user/dev/license-text.txt</span>
+                    <span class="file-name is-hidden"></span>
                   </label>
                 </div>
                 <div class="column is-half">
@@ -38,13 +33,31 @@ <h1 class="title is-5">Scan License</h1>
         </form>
     </section>
 </div>
-{% endblock %} {% block scripts %}
+{% endblock %} 
+
+{% block scripts %}
 <script>
-    let form = document.querySelector('form');
+    const fileInput = document.querySelector('#id_input_file');
+    fileInput.onchange = updateFile;
+    
+    // Update the file name on upload
+    function updateFile() {
+      const fileName = document.querySelector('.file-name');
+      console.log(fileInput.files)
+      if (fileInput.files.length > 0) {
+        fileName.classList.remove('is-hidden')
+        fileName.innerHTML = fileInput.files[0].name;
+      }
+    }
+
+    // let form = document.querySelector('form');
 
     // form.addEventListener('submit', (event) => {
 
-    //   if (!form["id_input_file"].files.length && !form["id_input_text"].value.length) return false;
+    //   if (!form["id_input_file"].files.length && !form["id_input_text"].value.length) {
+    //     console.log("err");
+    //   }
     // });
+
 </script>
-{% endblock %} -->
\ No newline at end of file
+{% endblock %}

From d2bd8d05dbf92ea5fe15e7e486a53c08a4dbcc14 Mon Sep 17 00:00:00 2001
From: Akhil Raj <lf32.dev@gmail.com>
Date: Tue, 21 Jun 2022 00:24:32 +0530
Subject: [PATCH 05/59] Run License Detection On Text Submission with tempfile
 #450

* license_scanview function uses tempfile to run license detection on the
  provided input license text

Signed-off-by: Akhil Raj <lf32.dev@gmail.com>
---
 scantext/apps.py  |  1 -
 scantext/views.py | 41 +++++++++++++++++++----------------------
 2 files changed, 19 insertions(+), 23 deletions(-)

diff --git a/scantext/apps.py b/scantext/apps.py
index 695cd08ca..9c100a202 100644
--- a/scantext/apps.py
+++ b/scantext/apps.py
@@ -24,5 +24,4 @@
 
 
 class ScantextConfig(AppConfig):
-    default_auto_field = "django.db.models.BigAutoField"
     name = "scantext"
diff --git a/scantext/views.py b/scantext/views.py
index 95e05f521..eb532d20f 100644
--- a/scantext/views.py
+++ b/scantext/views.py
@@ -20,7 +20,6 @@
 # ScanCode.io is a free software code scanning tool from nexB Inc. and others.
 # Visit https://github.com/nexB/scancode.io for support and download.
 
-import pprint
 import sys
 import tempfile
 
@@ -28,7 +27,7 @@
 from django.shortcuts import render
 from django.views import generic
 
-from scantext.forms import EditorForm
+from scantext.forms import LicenseForm
 
 SCANCODE_BASE_URL = "https://github.com/nexB/scancode-toolkit/tree/develop/src/licensedcode/data/licenses"
 SCANCODE_LICENSE_TEXT_URL = SCANCODE_BASE_URL + "/{}.LICENSE"
@@ -39,36 +38,34 @@
 
 
 def license_scanview(request):
-    form = EditorForm()
+    form = LicenseForm()
     if request.method == "POST":
-        form = EditorForm(request.POST)
+        form = LicenseForm(request.POST)
         if form.is_valid():
             text = form.cleaned_data["input_text"]
-            # license_location = tempfile.NamedTemporaryFile(mode="w", prefix="license_scan_", dir=settings.SCANCODEIO_WORKSPACE_LOCATION)
-            # with license_location as f:
-            #     f.write(text)
-            #     f.flush()
-            #     x=get_licenses(location=f)
-            #     f.close()
-            # the get_licenses in the above code (line 56) returns this error
-            # error:
-            # expected str, bytes or os.PathLike object, not _TemporaryFileWrapper
-            # the below code just works
-
-            expressions = get_licenses(
-                location="scantext/tests/data/LICENSES",
-                include_text=True,
-                license_text_diagnostics=True,
-            )
+            with tempfile.NamedTemporaryFile(
+                mode="w",
+                prefix="license_scan_",
+                dir=settings.SCANCODEIO_WORKSPACE_LOCATION,
+            ) as temp_file:
+                temp_file.write(text)
+                temp_file.flush()
+                expressions = get_licenses(
+                    location=temp_file.name,
+                    include_text=True,
+                    license_text_diagnostics=True,
+                )
+                temp_file.close()
+
             return render(
                 request,
                 "scantext/license_detail.html",
                 {
                     "text": text,
-                    "expr": expressions,
+                    "result": expressions,
                 },
             )
-    return render(request, "scantext/license_scan.html", {"form": form})
+    return render(request, "scantext/license_scan_form.html", {"form": form})
 
 
 def get_licenses(

From a113c142ff7ee5765a552d6972f92732e0d68960 Mon Sep 17 00:00:00 2001
From: Akhil Raj <lf32.dev@gmail.com>
Date: Wed, 22 Jun 2022 12:01:53 +0530
Subject: [PATCH 06/59] Rename license form, prevent text stripping, temp_file
 attr update #450

Signed-off-by: Akhil Raj <lf32.dev@gmail.com>
---
 scantext/forms.py |  3 ++-
 scantext/views.py | 10 ++++------
 2 files changed, 6 insertions(+), 7 deletions(-)

diff --git a/scantext/forms.py b/scantext/forms.py
index 0f21c4dcb..d08cf74cc 100644
--- a/scantext/forms.py
+++ b/scantext/forms.py
@@ -23,8 +23,9 @@
 from django import forms
 
 
-class LicenseForm(forms.Form):
+class LicenseScanForm(forms.Form):
     input_text = forms.CharField(
+        strip=False,
         widget=forms.Textarea(
             attrs={
                 "rows": 15,
diff --git a/scantext/views.py b/scantext/views.py
index eb532d20f..55d700a63 100644
--- a/scantext/views.py
+++ b/scantext/views.py
@@ -27,7 +27,7 @@
 from django.shortcuts import render
 from django.views import generic
 
-from scantext.forms import LicenseForm
+from scantext.forms import LicenseScanForm
 
 SCANCODE_BASE_URL = "https://github.com/nexB/scancode-toolkit/tree/develop/src/licensedcode/data/licenses"
 SCANCODE_LICENSE_TEXT_URL = SCANCODE_BASE_URL + "/{}.LICENSE"
@@ -38,24 +38,22 @@
 
 
 def license_scanview(request):
-    form = LicenseForm()
+    form = LicenseScanForm()
     if request.method == "POST":
-        form = LicenseForm(request.POST)
+        form = LicenseScanForm(request.POST)
         if form.is_valid():
             text = form.cleaned_data["input_text"]
             with tempfile.NamedTemporaryFile(
                 mode="w",
                 prefix="license_scan_",
-                dir=settings.SCANCODEIO_WORKSPACE_LOCATION,
             ) as temp_file:
                 temp_file.write(text)
-                temp_file.flush()
                 expressions = get_licenses(
                     location=temp_file.name,
                     include_text=True,
                     license_text_diagnostics=True,
+                    unknown_licenses=True
                 )
-                temp_file.close()
 
             return render(
                 request,

From 0fa7fbba9ab170e930d72cb0f2cce5c11b0ac9b7 Mon Sep 17 00:00:00 2001
From: Akhil Raj <lf32.dev@gmail.com>
Date: Wed, 22 Jun 2022 12:26:01 +0530
Subject: [PATCH 07/59] Detect unknown licenses and return matched text by
 default #450 * get_licenses detects unknown license by default * view returns
 matched_text by default * change variable attribute from `result` to
 `detected_licenses` Signed-off-by: Akhil Raj <lf32.dev@gmail.com>

---
 .../includes/license_detail_card.html         |  2 +-
 .../includes/license_summary_level.html       |  6 +--
 .../templates/scantext/license_detail.html    |  4 +-
 scantext/views.py                             | 40 +++++--------------
 4 files changed, 17 insertions(+), 35 deletions(-)

diff --git a/scantext/templates/scantext/includes/license_detail_card.html b/scantext/templates/scantext/includes/license_detail_card.html
index 451ce8d5f..731ca2c3e 100644
--- a/scantext/templates/scantext/includes/license_detail_card.html
+++ b/scantext/templates/scantext/includes/license_detail_card.html
@@ -1,4 +1,4 @@
-{% for license in result.licenses %}
+{% for license in detected_licenses.licenses %}
 <div class="licenses-card">
     <div class="card-header is-flex is-justify-content-space-between">
         <div class="card-header-title" title="{{ license.short_name }}">
diff --git a/scantext/templates/scantext/includes/license_summary_level.html b/scantext/templates/scantext/includes/license_summary_level.html
index d5b4ec19f..67952188a 100644
--- a/scantext/templates/scantext/includes/license_summary_level.html
+++ b/scantext/templates/scantext/includes/license_summary_level.html
@@ -4,7 +4,7 @@
         <div>
             <p class="heading">Percentage Of License Text</p>
             <p class="title">
-              <span>{{ result.percentage_of_license_text }}</span>
+              <span>{{ detected_licenses.percentage_of_license_text }}</span>
             </p>
         </div>
     </div>
@@ -12,7 +12,7 @@
         <div>
             <p class="heading">License Expressions</p>
             <p class="title">
-              <span>{{ result.license_expressions|length }}</span>
+              <span>{{ detected_licenses.license_expressions|length }}</span>
             </p>
         </div>
     </div>
@@ -20,7 +20,7 @@
         <div>
             <p class="heading">Licenses</p>
             <p class="title">
-              <span>{{ result.licenses|length }}</span>
+              <span>{{ detected_licenses.licenses|length }}</span>
             </p>
         </div>
     </div>
diff --git a/scantext/templates/scantext/license_detail.html b/scantext/templates/scantext/license_detail.html
index ae932b83c..7d9b20fde 100644
--- a/scantext/templates/scantext/license_detail.html
+++ b/scantext/templates/scantext/license_detail.html
@@ -14,7 +14,7 @@ <h1 class="title is-4">License Detection Summary</h1>
     </section>
 
     <hr class="mx-1 mt-0">
-    {% include 'scantext/includes/license_summary_level.html' with result=result %}
+    {% include 'scantext/includes/license_summary_level.html' with detected_licenses=detected_licenses %}
     <hr class="mx-1">
 
     <div class="columns mb-5 mx-1">
@@ -25,7 +25,7 @@ <h1 class="title is-4">License Detection Summary</h1>
         <div class="column is-half">
             <p class="title is-5">Detected Licenses</p>
             <div class="card">
-                {% include 'scantext/includes/license_detail_card.html' with result=result %}
+                {% include 'scantext/includes/license_detail_card.html' with detected_licenses=detected_licenses %}
             </div>
         </div>
     </div>
diff --git a/scantext/views.py b/scantext/views.py
index 55d700a63..2d7fc7669 100644
--- a/scantext/views.py
+++ b/scantext/views.py
@@ -50,17 +50,15 @@ def license_scanview(request):
                 temp_file.write(text)
                 expressions = get_licenses(
                     location=temp_file.name,
-                    include_text=True,
-                    license_text_diagnostics=True,
-                    unknown_licenses=True
                 )
 
+            print(expressions)
             return render(
                 request,
                 "scantext/license_detail.html",
                 {
                     "text": text,
-                    "result": expressions,
+                    "detected_licenses": expressions,
                 },
             )
     return render(request, "scantext/license_scan_form.html", {"form": form})
@@ -68,11 +66,7 @@ def license_scanview(request):
 
 def get_licenses(
     location,
-    min_score=0,
-    include_text=False,
-    license_text_diagnostics=False,
     license_url_template=SCANCODE_LICENSEDB_URL,
-    unknown_licenses=False,
     deadline=sys.maxsize,
     **kwargs,
 ):
@@ -86,12 +80,7 @@ def get_licenses(
     `min_score` is a minimum score threshold from 0 to 100. The default is 0,
     meaning that all license matches are returned. If specified, matches with a
     score lower than `minimum_score` are not returned.
-    If `include_text` is True, matched text is included in the returned
-    `licenses` data as well as a file-level `percentage_of_license_text` percentage to
-    indicate the overall proportion of detected license text and license notice
-    words in the file. This is used to determine if a file contains mostly
-    licensing information.
-    If ``unknown_licenses`` is True, also detect unknown licenses.
+    By Default ``unknown_licenses`` is set to True to detect unknown licenses.
     """
     from licensedcode import cache
     from licensedcode.spans import Span
@@ -103,9 +92,9 @@ def get_licenses(
 
     matches = idx.match(
         location=location,
-        min_score=min_score,
+        min_score=0,
         deadline=deadline,
-        unknown_licenses=unknown_licenses,
+        unknown_licenses=True,
         **kwargs,
     )
 
@@ -119,8 +108,6 @@ def get_licenses(
         detected_licenses.extend(
             _licenses_data_from_match(
                 match=match,
-                include_text=include_text,
-                license_text_diagnostics=license_text_diagnostics,
                 license_url_template=license_url_template,
             )
         )
@@ -147,8 +134,6 @@ def get_licenses(
 
 def _licenses_data_from_match(
     match,
-    include_text=False,
-    license_text_diagnostics=False,
     license_url_template=SCANCODE_LICENSEDB_URL,
 ):
     """
@@ -159,12 +144,10 @@ def _licenses_data_from_match(
 
     licenses = cache.get_licenses_db()
 
-    matched_text = None
-    if include_text:
-        if license_text_diagnostics:
-            matched_text = match.matched_text(whole_lines=False, highlight=True)
-        else:
-            matched_text = match.matched_text(whole_lines=True, highlight=False)
+    """
+    Returned matched_text will also include the text detected 
+    """
+    matched_text = match.matched_text(whole_lines=False, highlight=True)
 
     detected_licenses = []
     for license_key in match.rule.license_keys():
@@ -200,6 +183,7 @@ def _licenses_data_from_match(
         result["spdx_url"] = spdx_url
         result["start_line"] = match.start_line
         result["end_line"] = match.end_line
+        result["matched_text"] = matched_text
         matched_rule = result["matched_rule"] = {}
         matched_rule["identifier"] = match.rule.identifier
         matched_rule["license_expression"] = match.rule.license_expression
@@ -216,7 +200,5 @@ def _licenses_data_from_match(
         matched_rule["matched_length"] = match.len()
         matched_rule["match_coverage"] = match.coverage()
         matched_rule["rule_relevance"] = match.rule.relevance
-        # FIXME: for sanity this should always be included?????
-        if include_text:
-            result["matched_text"] = matched_text
+
     return detected_licenses

From ea6f4bbe88a63abf66d5ff91b6352059549b8769 Mon Sep 17 00:00:00 2001
From: Akhil Raj <lf32.dev@gmail.com>
Date: Wed, 22 Jun 2022 16:21:19 +0530
Subject: [PATCH 08/59] Flush contents of the temfile before passing as an
 argument #450

Signed-off-by: Akhil Raj <lf32.dev@gmail.com>
---
 scantext/views.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/scantext/views.py b/scantext/views.py
index 2d7fc7669..97bbead9b 100644
--- a/scantext/views.py
+++ b/scantext/views.py
@@ -48,11 +48,12 @@ def license_scanview(request):
                 prefix="license_scan_",
             ) as temp_file:
                 temp_file.write(text)
+                temp_file.flush()
                 expressions = get_licenses(
                     location=temp_file.name,
                 )
+                temp_file.close()
 
-            print(expressions)
             return render(
                 request,
                 "scantext/license_detail.html",

From 947faafa3c5443161b667044ebb05031b698866b Mon Sep 17 00:00:00 2001
From: Akhil Raj <lf32.dev@gmail.com>
Date: Wed, 22 Jun 2022 16:21:19 +0530
Subject: [PATCH 09/59] Flush contents of the tempfile before passing as an
 argument #450

Signed-off-by: Akhil Raj <lf32.dev@gmail.com>
---
 scantext/views.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/scantext/views.py b/scantext/views.py
index 2d7fc7669..97bbead9b 100644
--- a/scantext/views.py
+++ b/scantext/views.py
@@ -48,11 +48,12 @@ def license_scanview(request):
                 prefix="license_scan_",
             ) as temp_file:
                 temp_file.write(text)
+                temp_file.flush()
                 expressions = get_licenses(
                     location=temp_file.name,
                 )
+                temp_file.close()
 
-            print(expressions)
             return render(
                 request,
                 "scantext/license_detail.html",

From 60af91322ccf30f89142734c4c91563d2d736808 Mon Sep 17 00:00:00 2001
From: Akhil Raj <lf32.dev@gmail.com>
Date: Wed, 22 Jun 2022 18:51:24 +0530
Subject: [PATCH 10/59] Deleted models.py and migrations #450

Signed-off-by: Akhil Raj <lf32.dev@gmail.com>
---
 scantext/migrations/__init__.py |  0
 scantext/models.py              | 25 -------------------------
 2 files changed, 25 deletions(-)
 delete mode 100644 scantext/migrations/__init__.py
 delete mode 100644 scantext/models.py

diff --git a/scantext/migrations/__init__.py b/scantext/migrations/__init__.py
deleted file mode 100644
index e69de29bb..000000000
diff --git a/scantext/models.py b/scantext/models.py
deleted file mode 100644
index 458130fff..000000000
--- a/scantext/models.py
+++ /dev/null
@@ -1,25 +0,0 @@
-# SPDX-License-Identifier: Apache-2.0
-#
-# http://nexb.com and https://github.com/nexB/scancode.io
-# The ScanCode.io software is licensed under the Apache License version 2.0.
-# Data generated with ScanCode.io is provided as-is without warranties.
-# ScanCode is a trademark of nexB Inc.
-#
-# You may not use this software except in compliance with the License.
-# You may obtain a copy of the License at: http://apache.org/licenses/LICENSE-2.0
-# Unless required by applicable law or agreed to in writing, software distributed
-# under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
-# CONDITIONS OF ANY KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations under the License.
-#
-# Data Generated with ScanCode.io is provided on an "AS IS" BASIS, WITHOUT WARRANTIES
-# OR CONDITIONS OF ANY KIND, either express or implied. No content created from
-# ScanCode.io should be considered or used as legal advice. Consult an Attorney
-# for any legal advice.
-#
-# ScanCode.io is a free software code scanning tool from nexB Inc. and others.
-# Visit https://github.com/nexB/scancode.io for support and download.
-
-from django.db import models
-
-# Create your models here.

From 28d0dcf54bc753d88dc8c369290c80398cb2b161 Mon Sep 17 00:00:00 2001
From: Akhil Raj <lf32.dev@gmail.com>
Date: Wed, 22 Jun 2022 19:16:31 +0530
Subject: [PATCH 11/59] Added comment related to flushing tempfile #450

Signed-off-by: Akhil Raj <lf32.dev@gmail.com>
---
 scantext/views.py | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/scantext/views.py b/scantext/views.py
index 97bbead9b..34724588b 100644
--- a/scantext/views.py
+++ b/scantext/views.py
@@ -43,22 +43,23 @@ def license_scanview(request):
         form = LicenseScanForm(request.POST)
         if form.is_valid():
             text = form.cleaned_data["input_text"]
+            """
+            The flush in tempfile is required to ensure that the content is written to the disk before it's read by get_licenses function
+            """
             with tempfile.NamedTemporaryFile(
                 mode="w",
-                prefix="license_scan_",
             ) as temp_file:
                 temp_file.write(text)
                 temp_file.flush()
                 expressions = get_licenses(
                     location=temp_file.name,
                 )
-                temp_file.close()
 
             return render(
                 request,
                 "scantext/license_detail.html",
                 {
-                    "text": text,
+                    "text": text.split('\n'),
                     "detected_licenses": expressions,
                 },
             )
@@ -146,7 +147,7 @@ def _licenses_data_from_match(
     licenses = cache.get_licenses_db()
 
     """
-    Returned matched_text will also include the text detected 
+    Returned matched_text will also include the text detected
     """
     matched_text = match.matched_text(whole_lines=False, highlight=True)
 

From e46107aec86004519b9f6c4d3a70e5595d006788 Mon Sep 17 00:00:00 2001
From: Akhil Raj <lf32.dev@gmail.com>
Date: Wed, 22 Jun 2022 22:41:39 +0530
Subject: [PATCH 12/59] Commenting inside views with pound over docstring #450

Signed-off-by: Akhil Raj <lf32.dev@gmail.com>
---
 scantext/views.py | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/scantext/views.py b/scantext/views.py
index 34724588b..000916bcd 100644
--- a/scantext/views.py
+++ b/scantext/views.py
@@ -43,9 +43,7 @@ def license_scanview(request):
         form = LicenseScanForm(request.POST)
         if form.is_valid():
             text = form.cleaned_data["input_text"]
-            """
-            The flush in tempfile is required to ensure that the content is written to the disk before it's read by get_licenses function
-            """
+            # The flush in tempfile is required to ensure that the content is written to the disk before it's read by get_licenses function
             with tempfile.NamedTemporaryFile(
                 mode="w",
             ) as temp_file:
@@ -146,9 +144,7 @@ def _licenses_data_from_match(
 
     licenses = cache.get_licenses_db()
 
-    """
-    Returned matched_text will also include the text detected
-    """
+    # Returned matched_text will also include the text detected
     matched_text = match.matched_text(whole_lines=False, highlight=True)
 
     detected_licenses = []

From 44680854dc74ba2c5cb71800974fc6ff6eacdeae Mon Sep 17 00:00:00 2001
From: Akhil Raj <lf32.dev@gmail.com>
Date: Wed, 22 Jun 2022 22:56:57 +0530
Subject: [PATCH 13/59] Write comments in multiple lines inside views #450

Signed-off-by: Akhil Raj <lf32.dev@gmail.com>
---
 scantext/views.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/scantext/views.py b/scantext/views.py
index 000916bcd..8db4f4bfc 100644
--- a/scantext/views.py
+++ b/scantext/views.py
@@ -43,7 +43,8 @@ def license_scanview(request):
         form = LicenseScanForm(request.POST)
         if form.is_valid():
             text = form.cleaned_data["input_text"]
-            # The flush in tempfile is required to ensure that the content is written to the disk before it's read by get_licenses function
+            # The flush in tempfile is required to ensure that the content is
+            # written to the disk before it's read by get_licenses function
             with tempfile.NamedTemporaryFile(
                 mode="w",
             ) as temp_file:
@@ -57,7 +58,7 @@ def license_scanview(request):
                 request,
                 "scantext/license_detail.html",
                 {
-                    "text": text.split('\n'),
+                    "text": text.split("\n"),
                     "detected_licenses": expressions,
                 },
             )

From 8e7c72bcb819d39c20dc607229210b5311bd20b6 Mon Sep 17 00:00:00 2001
From: Akhil Raj <lf32.dev@gmail.com>
Date: Wed, 22 Jun 2022 23:16:47 +0530
Subject: [PATCH 14/59] Input text is rendered inside the box over the textarea
 #450

Signed-off-by: Akhil Raj <lf32.dev@gmail.com>
---
 scantext/templates/scantext/license_detail.html | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scantext/templates/scantext/license_detail.html b/scantext/templates/scantext/license_detail.html
index 7d9b20fde..fc12e2773 100644
--- a/scantext/templates/scantext/license_detail.html
+++ b/scantext/templates/scantext/license_detail.html
@@ -20,7 +20,7 @@ <h1 class="title is-4">License Detection Summary</h1>
     <div class="columns mb-5 mx-1">
         <div class="column is-half">
             <p class="title is-5">Input License Text</p>
-            <textarea class="textarea has-fixed-size" rows="25" style="cursor: text;" disabled>{{ text }}</textarea>
+	    <div class="box" style="height: 70vh; overflow-y: scroll; white-space: pre-wrap;">{% for line in text %}<span>{{ line }}</span>{% endfor %}</div>
         </div>
         <div class="column is-half">
             <p class="title is-5">Detected Licenses</p>

From 91ca751f5270b67a35059c6d68abc134377796d7 Mon Sep 17 00:00:00 2001
From: Akhil Raj <lf32.dev@gmail.com>
Date: Mon, 27 Jun 2022 13:18:54 +0530
Subject: [PATCH 15/59] Display form errors, detect licenses for input text
 file #450

Signed-off-by: Akhil Raj <lf32.dev@gmail.com>
---
 .../includes/license_detail_card.html         |  6 +-
 .../templates/scantext/license_detail.html    |  6 +-
 .../templates/scantext/license_scan_form.html | 16 ++++-
 scantext/views.py                             | 59 +++++++++++++++----
 4 files changed, 71 insertions(+), 16 deletions(-)

diff --git a/scantext/templates/scantext/includes/license_detail_card.html b/scantext/templates/scantext/includes/license_detail_card.html
index 731ca2c3e..a546039bd 100644
--- a/scantext/templates/scantext/includes/license_detail_card.html
+++ b/scantext/templates/scantext/includes/license_detail_card.html
@@ -32,7 +32,11 @@
                     </p>
                 </div>
             </div>
-            <textarea class="textarea has-fixed-size" rows="15" style="cursor: text;" disabled>{{ license.matched_text }}</textarea>
+            <article class="message is-dark" style="max-height: 30vh; overflow-y: scroll;">
+                <div class="message-body" style="white-space: pre-wrap;">
+                    {{ license.matched_text }}
+                </div>
+            </article>
         </div>
     </div>
 </div>
diff --git a/scantext/templates/scantext/license_detail.html b/scantext/templates/scantext/license_detail.html
index fc12e2773..2cce30c4e 100644
--- a/scantext/templates/scantext/license_detail.html
+++ b/scantext/templates/scantext/license_detail.html
@@ -20,13 +20,13 @@ <h1 class="title is-4">License Detection Summary</h1>
     <div class="columns mb-5 mx-1">
         <div class="column is-half">
             <p class="title is-5">Input License Text</p>
-	    <div class="box" style="height: 70vh; overflow-y: scroll; white-space: pre-wrap;">{% for line in text %}<span>{{ line }}</span>{% endfor %}</div>
+	       <div class="box" style="max-height: 68vh; overflow-y: scroll; white-space: pre-wrap;">{% for line in text %}<span >{{ line }}</span>{% endfor %}</div>
         </div>
         <div class="column is-half">
             <p class="title is-5">Detected Licenses</p>
             <div class="card">
-                {% include 'scantext/includes/license_detail_card.html' with detected_licenses=detected_licenses %}
-            </div>
+                {% include 'scantext/includes/license_detail_card.html' with detected_licenses=detected_licenses %}     
+            </div>       
         </div>
     </div>
 
diff --git a/scantext/templates/scantext/license_scan_form.html b/scantext/templates/scantext/license_scan_form.html
index 77fa980ee..5619ffe64 100644
--- a/scantext/templates/scantext/license_scan_form.html
+++ b/scantext/templates/scantext/license_scan_form.html
@@ -10,7 +10,19 @@
         <div class="mb-4">
             <h1 class="title is-4">Scan License</h1>
         </div>
-        <form method="post" action="{% url 'license_scan' %}">
+
+        <article class="message is-warning {% if not input_error %}is-hidden{% endif %}">
+            <div class="message-body">
+                {{ input_error }}
+            </div>
+        </article>
+        <article class="message is-info {% if not detection_message %}is-hidden{% endif %}">
+            <div class="message-body">
+                {{ detection_message }}
+            </div>
+        </article>
+
+        <form action="{% url 'license_scan' %}" method="post" enctype="multipart/form-data">
             {% csrf_token %}
             {{ form.input_text }}
             <div class="columns mt-1">
@@ -21,7 +33,7 @@ <h1 class="title is-4">Scan License</h1>
                       <span class="file-icon">
                         <i class="fas fa-upload"></i>
                       </span>
-                      <span class="file-label-text">Choose a file to scan…</span>
+                      <span class="file-label-text">Choose a text file to scan…</span>
                     </span>
                     <span class="file-name is-hidden"></span>
                   </label>
diff --git a/scantext/views.py b/scantext/views.py
index 8db4f4bfc..fa0090537 100644
--- a/scantext/views.py
+++ b/scantext/views.py
@@ -40,25 +40,64 @@
 def license_scanview(request):
     form = LicenseScanForm()
     if request.method == "POST":
-        form = LicenseScanForm(request.POST)
+        form = LicenseScanForm(request.POST, request.FILES)
         if form.is_valid():
-            text = form.cleaned_data["input_text"]
+            input_text = form.cleaned_data["input_text"]
+            input_file = request.FILES.get("input_file", False)
+            if not len(input_text) and not input_file:
+                message = "Please provide some text or a text file to scan."
+                return render(
+                    request,
+                    "scantext/license_scan_form.html",
+                    {
+                        "form": LicenseScanForm(),
+                        "input_error": message,
+                    },
+                )
+
             # The flush in tempfile is required to ensure that the content is
             # written to the disk before it's read by get_licenses function
-            with tempfile.NamedTemporaryFile(
-                mode="w",
-            ) as temp_file:
-                temp_file.write(text)
-                temp_file.flush()
-                expressions = get_licenses(
-                    location=temp_file.name,
+            if len(input_text):
+                with tempfile.NamedTemporaryFile(
+                    mode="w",
+                ) as temp_file:
+                    temp_file.write(input_text)
+                    temp_file.flush()
+                    expressions = get_licenses(
+                        location=temp_file.name,
+                    )
+            elif input_file:
+                # import typecode
+                # print(typecode.contenttype.magic2.mime_type())
+                # the below code only works for text files and doesnot check the file type
+                with tempfile.NamedTemporaryFile(
+                    mode="w",
+                ) as temp_file:
+                    input_text = str(input_file.read(), "UTF-8")
+                    temp_file.write(input_text)
+                    temp_file.flush()
+                    expressions = get_licenses(
+                        location=temp_file.name,
+                    )
+
+            if not len(expressions["licenses"]) and not len(
+                expressions["license_expressions"]
+            ):
+                message = "Couldn't detect any license from the provided input."
+                return render(
+                    request,
+                    "scantext/license_scan_form.html",
+                    {
+                        "form": form,
+                        "detection_message": message,
+                    },
                 )
 
             return render(
                 request,
                 "scantext/license_detail.html",
                 {
-                    "text": text.split("\n"),
+                    "text": input_text.split("\n"),
                     "detected_licenses": expressions,
                 },
             )

From cfb7a37d28af2e4a16a285690417f012b9edc12d Mon Sep 17 00:00:00 2001
From: Akhil Raj <lf32.dev@gmail.com>
Date: Fri, 1 Jul 2022 16:15:52 +0530
Subject: [PATCH 16/59] Improve file error response in views #450

Signed-off-by: Akhil Raj <lf32.dev@gmail.com>
---
 .../templates/scantext/license_scan_form.html | 26 +-------
 scantext/views.py                             | 66 ++++++++++---------
 2 files changed, 38 insertions(+), 54 deletions(-)

diff --git a/scantext/templates/scantext/license_scan_form.html b/scantext/templates/scantext/license_scan_form.html
index 5619ffe64..0b62ef259 100644
--- a/scantext/templates/scantext/license_scan_form.html
+++ b/scantext/templates/scantext/license_scan_form.html
@@ -1,5 +1,5 @@
-{% extends "scanpipe/base.html" %} 
-{% load static humanize %} 
+{% extends "scanpipe/base.html" %}
+{% load static humanize %}
 
 {% block content %}
 <div class="container is-max-desktop">
@@ -11,17 +11,6 @@
             <h1 class="title is-4">Scan License</h1>
         </div>
 
-        <article class="message is-warning {% if not input_error %}is-hidden{% endif %}">
-            <div class="message-body">
-                {{ input_error }}
-            </div>
-        </article>
-        <article class="message is-info {% if not detection_message %}is-hidden{% endif %}">
-            <div class="message-body">
-                {{ detection_message }}
-            </div>
-        </article>
-
         <form action="{% url 'license_scan' %}" method="post" enctype="multipart/form-data">
             {% csrf_token %}
             {{ form.input_text }}
@@ -45,7 +34,7 @@ <h1 class="title is-4">Scan License</h1>
         </form>
     </section>
 </div>
-{% endblock %} 
+{% endblock %}
 
 {% block scripts %}
 <script>
@@ -62,14 +51,5 @@ <h1 class="title is-4">Scan License</h1>
       }
     }
 
-    // let form = document.querySelector('form');
-
-    // form.addEventListener('submit', (event) => {
-
-    //   if (!form["id_input_file"].files.length && !form["id_input_text"].value.length) {
-    //     console.log("err");
-    //   }
-    // });
-
 </script>
 {% endblock %}
diff --git a/scantext/views.py b/scantext/views.py
index fa0090537..a59cd66b7 100644
--- a/scantext/views.py
+++ b/scantext/views.py
@@ -24,12 +24,14 @@
 import tempfile
 
 from django.conf import settings
+from django.contrib import messages
 from django.shortcuts import render
 from django.views import generic
 
 from scantext.forms import LicenseScanForm
 
-SCANCODE_BASE_URL = "https://github.com/nexB/scancode-toolkit/tree/develop/src/licensedcode/data/licenses"
+SCANCODE_REPO_URL = "https://github.com/nexB/scancode-toolkit"
+SCANCODE_BASE_URL = SCANCODE_REPO_URL + "/tree/develop/src/licensedcode/data/licenses"
 SCANCODE_LICENSE_TEXT_URL = SCANCODE_BASE_URL + "/{}.LICENSE"
 SCANCODE_LICENSE_DATA_URL = SCANCODE_BASE_URL + "/{}.yml"
 SPDX_LICENSE_URL = "https://spdx.org/licenses/{}"
@@ -46,56 +48,60 @@ def license_scanview(request):
             input_file = request.FILES.get("input_file", False)
             if not len(input_text) and not input_file:
                 message = "Please provide some text or a text file to scan."
+                messages.warning(request, message)
                 return render(
                     request,
                     "scantext/license_scan_form.html",
                     {
                         "form": LicenseScanForm(),
-                        "input_error": message,
                     },
                 )
 
             # The flush in tempfile is required to ensure that the content is
             # written to the disk before it's read by get_licenses function
             if len(input_text):
-                with tempfile.NamedTemporaryFile(
-                    mode="w",
-                ) as temp_file:
+                with tempfile.NamedTemporaryFile(mode="w") as temp_file:
                     temp_file.write(input_text)
                     temp_file.flush()
                     expressions = get_licenses(
                         location=temp_file.name,
                     )
             elif input_file:
-                # import typecode
-                # print(typecode.contenttype.magic2.mime_type())
-                # the below code only works for text files and doesnot check the file type
-                with tempfile.NamedTemporaryFile(
-                    mode="w",
-                ) as temp_file:
-                    input_text = str(input_file.read(), "UTF-8")
-                    temp_file.write(input_text)
-                    temp_file.flush()
-                    expressions = get_licenses(
-                        location=temp_file.name,
+                try:
+                    with tempfile.NamedTemporaryFile(mode="w") as temp_file:
+                        input_text = str(input_file.read(), "UTF-8")
+                        temp_file.write(input_text)
+                        temp_file.flush()
+                        expressions = get_licenses(
+                            location=temp_file.name,
+                        )
+                except UnicodeDecodeError:
+                    message = "Please upload a valid text file."
+                    messages.warning(request, message)
+                    return render(
+                        request,
+                        "scantext/license_scan_form.html",
+                        {
+                            "form": LicenseScanForm(),
+                        },
                     )
 
-            if not len(expressions["licenses"]) and not len(
-                expressions["license_expressions"]
-            ):
-                message = "Couldn't detect any license from the provided input."
-                return render(
-                    request,
-                    "scantext/license_scan_form.html",
-                    {
-                        "form": form,
-                        "detection_message": message,
-                    },
-                )
+            if not len(expressions["licenses"]):
+                if not len(expressions["license_expressions"]):
+                    message = "Couldn't detect any license from the provided input."
+                    messages.info(request, message)
+                    return render(
+                        request,
+                        "scantext/license_summary.html",
+                        {
+                            "text": input_text.split("\n"),
+                            "detected_licenses": expressions,
+                        },
+                    )
 
             return render(
                 request,
-                "scantext/license_detail.html",
+                "scantext/license_summary.html",
                 {
                     "text": input_text.split("\n"),
                     "detected_licenses": expressions,
@@ -161,12 +167,10 @@ def get_licenses(
             (matched_tokens_length / query_tokens_length) * 100, 2
         )
 
-    detected_spdx_expressions = []
     return dict(
         [
             ("licenses", detected_licenses),
             ("license_expressions", detected_expressions),
-            ("spdx_license_expressions", detected_spdx_expressions),
             ("percentage_of_license_text", percentage_of_license_text),
         ]
     )

From 7eedf84cb7ba3916fad29b3c2a8db7eb3777a644 Mon Sep 17 00:00:00 2001
From: Akhil Raj <lf32.dev@gmail.com>
Date: Fri, 1 Jul 2022 16:20:10 +0530
Subject: [PATCH 17/59] Moved license summary to details with  new UI #450

Signed-off-by: Akhil Raj <lf32.dev@gmail.com>
---
 .../scantext/includes/license_detail.html     | 48 ++++++++++++
 ...l_card.html => license_summary_cards.html} |  8 +-
 ...level.html => license_summary_header.html} |  0
 .../templates/scantext/license_detail.html    | 55 --------------
 .../templates/scantext/license_summary.html   | 76 +++++++++++++++++++
 5 files changed, 127 insertions(+), 60 deletions(-)
 create mode 100644 scantext/templates/scantext/includes/license_detail.html
 rename scantext/templates/scantext/includes/{license_detail_card.html => license_summary_cards.html} (80%)
 rename scantext/templates/scantext/includes/{license_summary_level.html => license_summary_header.html} (100%)
 delete mode 100644 scantext/templates/scantext/license_detail.html
 create mode 100644 scantext/templates/scantext/license_summary.html

diff --git a/scantext/templates/scantext/includes/license_detail.html b/scantext/templates/scantext/includes/license_detail.html
new file mode 100644
index 000000000..9b6935c2b
--- /dev/null
+++ b/scantext/templates/scantext/includes/license_detail.html
@@ -0,0 +1,48 @@
+<!-- code for the sticky divs in new ui -->
+<div class="info columns mb-5 mx-1">
+    <div class="column is-one-third">
+        <p class="title is-5">Detected Licenses</p>
+    </div>
+    <div class="column is-two-third">
+        <p class="title is-5">Matched Text</p>
+    </div>
+</div>
+
+{% for license in detected_licenses.licenses %}
+<div class="columns mb-5 mx-1">
+    <div class="column is-one-third mb-2">
+        <div id="sticky">
+            <div class="mb-2 is-link mx-1">
+                <a class="title is-5" href="{{ license.homepage_url }}" title="{{ license.name }} ({{ license.short_name }})"> {{ license.short_name }} <i class="fa fa-link is-link"></i></a>
+            </div>
+            <div class="is-flex">
+                <p class="tag is-6 mr-2 is-light {% if license.score == 100 %} is-success {% else %} is-warning {% endif %}">{{ license.score }}</p>
+                <p class="tag is-6 mr-2 is-light is-primary">{{ license.category }}</p>
+                <a class="tag is-6 mr-2 is-light is-link" href="{{ license.reference_url }}">ref</a>
+                <p class="tag is-6 mr-2 is-light is-info">
+                {% if license.start_line == license.end_line %} Line {{ license.start_line }} {% else %} Lines {{ license.start_line }} - {{ license.end_line }} {% endif %}
+                </p>
+            </div>
+        </div>
+    </div>
+    <div class="column is-two-third">
+        <article class="message is-info">
+            <div class="message-header is-hidden"></div>
+            <div class="message-body" style="white-space: pre-wrap; max-height: 30vh; overflow: scroll;">
+                {{ license.matched_text }}
+            </div>
+        </article>
+    </div>
+</div>
+<hr class="mx-1 mt-0">
+
+<style type="text/css">
+    #sticky {
+        position: sticky;
+        position: -webkit-sticky;
+        margin-top: 1vh;
+        top:  4vh;
+    }
+
+</style>
+{% endfor %}
\ No newline at end of file
diff --git a/scantext/templates/scantext/includes/license_detail_card.html b/scantext/templates/scantext/includes/license_summary_cards.html
similarity index 80%
rename from scantext/templates/scantext/includes/license_detail_card.html
rename to scantext/templates/scantext/includes/license_summary_cards.html
index a546039bd..f9df2aefe 100644
--- a/scantext/templates/scantext/includes/license_detail_card.html
+++ b/scantext/templates/scantext/includes/license_summary_cards.html
@@ -27,13 +27,11 @@
                 <div>
                     <p class="tag is-6 mx-1 is-light is-primary">{{ license.category }}</p>
                     <a class="tag is-6 mx-1 is-light is-link" href="{{ license.reference_url }}">ref</a>
-                    <p class="tag is-6 mx-1 is-light is-info">
-                        {% if license.start_line == license.end_line %} Line {{ license.start_line }} {% else %} Lines {{ license.start_line }} - {{ license.end_line }} {% endif %}
-                    </p>
+                    <p class="tag is-6 mx-1 is-light is-info">{% if license.start_line == license.end_line %} Line {{ license.start_line }} {% else %} Lines {{ license.start_line }} - {{ license.end_line }} {% endif %}</p>
                 </div>
             </div>
-            <article class="message is-dark" style="max-height: 30vh; overflow-y: scroll;">
-                <div class="message-body" style="white-space: pre-wrap;">
+            <article class="message is-dark">
+                <div class="message-body" style="max-height: 30vh; overflow-y: scroll; white-space: pre-wrap;">
                     {{ license.matched_text }}
                 </div>
             </article>
diff --git a/scantext/templates/scantext/includes/license_summary_level.html b/scantext/templates/scantext/includes/license_summary_header.html
similarity index 100%
rename from scantext/templates/scantext/includes/license_summary_level.html
rename to scantext/templates/scantext/includes/license_summary_header.html
diff --git a/scantext/templates/scantext/license_detail.html b/scantext/templates/scantext/license_detail.html
deleted file mode 100644
index 2cce30c4e..000000000
--- a/scantext/templates/scantext/license_detail.html
+++ /dev/null
@@ -1,55 +0,0 @@
-{% extends 'scanpipe/base.html' %} 
-{% load static humanize %} 
-
-{% block content %}
-<div class="container is-widescreen">
-    {% include 'scanpipe/includes/navbar_header.html' %}
-    <div class="mx-5 mb-2">{% include 'scanpipe/includes/messages.html' %}</div>
-
-    <section class="mx-1 mb-0">
-        <div class="is-flex is-justify-content-space-between">
-            <h1 class="title is-4">License Detection Summary</h1>
-            <a href="{% url 'license_scan' %}" class="button is-link">New Scan</a>
-        </div>
-    </section>
-
-    <hr class="mx-1 mt-0">
-    {% include 'scantext/includes/license_summary_level.html' with detected_licenses=detected_licenses %}
-    <hr class="mx-1">
-
-    <div class="columns mb-5 mx-1">
-        <div class="column is-half">
-            <p class="title is-5">Input License Text</p>
-	       <div class="box" style="max-height: 68vh; overflow-y: scroll; white-space: pre-wrap;">{% for line in text %}<span >{{ line }}</span>{% endfor %}</div>
-        </div>
-        <div class="column is-half">
-            <p class="title is-5">Detected Licenses</p>
-            <div class="card">
-                {% include 'scantext/includes/license_detail_card.html' with detected_licenses=detected_licenses %}     
-            </div>       
-        </div>
-    </div>
-
-</div>
-{% endblock %} 
-
-{% block scripts %}
-<script type="text/javascript">
-    const cards = document.querySelectorAll('.card-header-icon')
-    
-    cards.forEach(card => {
-      card.addEventListener('click', (event) => {
-        event.preventDefault()
-        content = card.parentElement.parentElement.parentElement.querySelector('.card-content').classList
-        lineTag= card.parentElement.querySelector('.lines').classList
-        if (content.contains('is-hidden')) {
-          content.remove('is-hidden')
-          lineTag.add('is-hidden')
-        } else {
-          content.add('is-hidden')
-          lineTag.remove('is-hidden')
-        }
-      })
-    })  
-</script>
-{% endblock %}
diff --git a/scantext/templates/scantext/license_summary.html b/scantext/templates/scantext/license_summary.html
new file mode 100644
index 000000000..52622d53e
--- /dev/null
+++ b/scantext/templates/scantext/license_summary.html
@@ -0,0 +1,76 @@
+{% extends 'scanpipe/base.html' %}
+{% load static humanize %}
+
+{% block content %}
+<div class="container is-widescreen">
+    {% include 'scanpipe/includes/navbar_header.html' %}
+    <div class="mx-5 mb-2">{% include 'scanpipe/includes/messages.html' %}</div>
+
+    <section class="mx-5 mb-0">
+        <div class="is-flex is-justify-content-space-between">
+            <h1 class="title is-4">License Detection Summary</h1>
+            <a href="{% url 'license_scan' %}" class="button is-link">New Scan</a>
+        </div>
+    </section>
+
+    <!-- still under implementation -->
+    <div class="tabs">
+      <ul>
+        <li class="is-active"><a>Summary</a></li>
+        <li><a>Details</a></li>
+      </ul>
+    </div>
+
+    {% include 'scantext/includes/license_summary_header.html' with detected_licenses=detected_licenses %}
+    <hr class="mx-1">
+
+    <div class="info columns mb-5 mx-1">
+        <div class="column is-half">
+           <p class="title is-5">Input License Text</p>
+	       <div class="box" style="max-height: 70vh; overflow: scroll; white-space: pre-wrap;">{% for line in text %}<span class="is-block">{{ line }}</span>{% endfor %}</div>
+        </div>
+        <div class="column is-half">
+            <p class="title is-5">Matched Text</p>
+            <div class="card">
+                {% include 'scantext/includes/license_summary_cards.html' with detected_licenses=detected_licenses %}
+            </div>
+        </div>
+    </div>
+
+    {% include 'scantext/includes/license_detail.html' with detected_licenses=detected_licenses %}
+</div>
+{% endblock %}
+
+{% block scripts %}
+<script src="{% static 'billboard-3.0.1.pkgd.min.js' %}" crossorigin="anonymous"></script>
+<script type="text/javascript">
+    const cards = document.querySelectorAll('.card-header-icon')
+    
+    document.querySelectorAll('li').forEach(li => {
+        li.addEventListener('click', e => {
+         if (!li.classList.contains('is-active')) {
+            document.querySelectorAll('li').forEach(listItem => {
+                listItem.classList.remove('is-active')
+            })
+            li.classList.add('is-active')
+         }
+        })
+    })
+
+    cards.forEach(card => {
+      card.addEventListener('click', (event) => {
+        event.preventDefault()
+        content = card.parentElement.parentElement.parentElement.querySelector('.card-content').classList
+        lineTag= card.parentElement.querySelector('.lines').classList
+        if (content.contains('is-hidden')) {
+          content.remove('is-hidden')
+          lineTag.add('is-hidden')
+        } else {
+          content.add('is-hidden')
+          lineTag.remove('is-hidden')
+        }
+      })
+    })
+
+</script>
+{% endblock %}

From 2d4f7f6f5fa25ef980374a611fce577516734b4d Mon Sep 17 00:00:00 2001
From: Akhil Raj <lf32.dev@gmail.com>
Date: Fri, 1 Jul 2022 16:27:20 +0530
Subject: [PATCH 18/59] Fixed broken short lines #450

Signed-off-by: Akhil Raj <lf32.dev@gmail.com>
---
 scantext/views.py | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/scantext/views.py b/scantext/views.py
index a59cd66b7..d9596bd9e 100644
--- a/scantext/views.py
+++ b/scantext/views.py
@@ -63,18 +63,14 @@ def license_scanview(request):
                 with tempfile.NamedTemporaryFile(mode="w") as temp_file:
                     temp_file.write(input_text)
                     temp_file.flush()
-                    expressions = get_licenses(
-                        location=temp_file.name,
-                    )
+                    expressions = get_licenses(location=temp_file.name)
             elif input_file:
                 try:
                     with tempfile.NamedTemporaryFile(mode="w") as temp_file:
                         input_text = str(input_file.read(), "UTF-8")
                         temp_file.write(input_text)
                         temp_file.flush()
-                        expressions = get_licenses(
-                            location=temp_file.name,
-                        )
+                        expressions = get_licenses(location=temp_file.name)
                 except UnicodeDecodeError:
                     message = "Please upload a valid text file."
                     messages.warning(request, message)

From bbe58d002f2001a8ecbac54b3236a0ebb2d098b6 Mon Sep 17 00:00:00 2001
From: Akhil Raj <lf32.dev@gmail.com>
Date: Fri, 1 Jul 2022 16:33:13 +0530
Subject: [PATCH 19/59] New line at the end of the page #450

Signed-off-by: Akhil Raj <lf32.dev@gmail.com>
---
 scantext/templates/scantext/includes/license_detail.html | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scantext/templates/scantext/includes/license_detail.html b/scantext/templates/scantext/includes/license_detail.html
index 9b6935c2b..3e9280b55 100644
--- a/scantext/templates/scantext/includes/license_detail.html
+++ b/scantext/templates/scantext/includes/license_detail.html
@@ -45,4 +45,4 @@
     }
 
 </style>
-{% endfor %}
\ No newline at end of file
+{% endfor %}

From a19a7621301ac1b2cbab5921aa3e33e44ece2dce Mon Sep 17 00:00:00 2001
From: Akhil Raj <lf32.dev@gmail.com>
Date: Mon, 4 Jul 2022 17:46:37 +0530
Subject: [PATCH 20/59] Ace Editor Restored with improved UI #450

Signed-off-by: Akhil Raj <lf32.dev@gmail.com>
---
 .../includes/license_summary_cards.html       | 47 +++++++++++-----
 .../templates/scantext/license_summary.html   | 53 ++++++++++++++-----
 scantext/views.py                             |  5 +-
 3 files changed, 76 insertions(+), 29 deletions(-)

diff --git a/scantext/templates/scantext/includes/license_summary_cards.html b/scantext/templates/scantext/includes/license_summary_cards.html
index f9df2aefe..4906a2bdf 100644
--- a/scantext/templates/scantext/includes/license_summary_cards.html
+++ b/scantext/templates/scantext/includes/license_summary_cards.html
@@ -1,6 +1,6 @@
 {% for license in detected_licenses.licenses %}
 <div class="licenses-card">
-    <div class="card-header is-flex is-justify-content-space-between">
+    <div class="card-header is-flex is-justify-content-space-between pr-2">
         <div class="card-header-title" title="{{ license.short_name }}">
             {% if license.homepage_url %}
             <a href="{{ license.homepage_url }}" title="{{ license.name }} ({{ license.short_name }})">{{ license.short_name }}</a> {% else %} {{ license.short_name }} {% endif %}
@@ -11,30 +11,49 @@
             </p>
             <p class="tag is-6 mx-1 is-light 
                   {% if license.score == 100 %} is-success {% else %} is-warning {% endif %}">{{ license.score }}</p>
-            <p class="card-header-icon" aria-label="more options">
-                <span class="icon">
-                    <i class="fas fa-angle-down" aria-hidden="true"></i>
-                </span>
-            </p>
+            <p class="card-header-icon tag is-4 is-info" aria-label="more options" data-text="{{ license.matched_text }}">View</p>
         </div>
     </div>
     <div class="card-content is-hidden">
         <div class="content">
             <div class="is-flex is-justify-content-space-between">
                 <div>
-                    <p class="subtitle is-5">Matched Text</p>
+                    <p class="title is-5">Details</p>
                 </div>
                 <div>
-                    <p class="tag is-6 mx-1 is-light is-primary">{{ license.category }}</p>
-                    <a class="tag is-6 mx-1 is-light is-link" href="{{ license.reference_url }}">ref</a>
                     <p class="tag is-6 mx-1 is-light is-info">{% if license.start_line == license.end_line %} Line {{ license.start_line }} {% else %} Lines {{ license.start_line }} - {{ license.end_line }} {% endif %}</p>
                 </div>
             </div>
-            <article class="message is-dark">
-                <div class="message-body" style="max-height: 30vh; overflow-y: scroll; white-space: pre-wrap;">
-                    {{ license.matched_text }}
-                </div>
-            </article>
+            <div class="table-container mt-3">
+              <table class="table is-striped is-narrow is-hoverable is-fullwidth">
+                <tr>
+                    <td><strong>Name</strong></td>
+                    <td>{{ license.name }}</td>
+                </tr>
+                <tr>
+                    <td><strong>Score</strong></td>
+                    <td>{{ license.score }}</td>
+                </tr>
+                <tr>
+                    <td><strong>Owner</strong></td>
+                    <td>{{ license.owner }}</td>
+                </tr>
+                <tr>
+                    <td><strong>Category</strong></td>
+                    <td>
+                        <p class="tag is-4 is-light is-primary">{{ license.category }}</p>
+                    </td>
+                </tr>
+                <tr>
+                    <td><strong>SPDX Key</strong></td>
+                    <td>{{ license.spdx_license_key }}</td>
+                </tr>
+                <tr>
+                    <td><strong>Reference</strong></td>
+                    <td>{{ license.reference_url }}</td>
+                </tr>
+              </table>
+            </div>
         </div>
     </div>
 </div>
diff --git a/scantext/templates/scantext/license_summary.html b/scantext/templates/scantext/license_summary.html
index 52622d53e..b19a71868 100644
--- a/scantext/templates/scantext/license_summary.html
+++ b/scantext/templates/scantext/license_summary.html
@@ -25,12 +25,15 @@ <h1 class="title is-4">License Detection Summary</h1>
     <hr class="mx-1">
 
     <div class="info columns mb-5 mx-1">
-        <div class="column is-half">
-           <p class="title is-5">Input License Text</p>
-	       <div class="box" style="max-height: 70vh; overflow: scroll; white-space: pre-wrap;">{% for line in text %}<span class="is-block">{{ line }}</span>{% endfor %}</div>
+        <div class="column is-two-third">
+            <div class="is-flex is-justify-content-space-between mb-2">
+                <p class="main-txt-view title is-5">Input License Text</p>
+                <button class="main-txt-btn button is-info is-hidden" data-text="{{ text }}" >View Input License</button>
+            </div>
+            <div id="editor" style="height: 70vh; border: lightgrey 1px solid; position: sticky; top: 2vh;">{{ text }}</div>
         </div>
-        <div class="column is-half">
-            <p class="title is-5">Matched Text</p>
+        <div class="column is-one-third">
+            <p class="title is-5">Detected Licenses</p>
             <div class="card">
                 {% include 'scantext/includes/license_summary_cards.html' with detected_licenses=detected_licenses %}
             </div>
@@ -42,12 +45,27 @@ <h1 class="title is-4">License Detection Summary</h1>
 {% endblock %}
 
 {% block scripts %}
-<script src="{% static 'billboard-3.0.1.pkgd.min.js' %}" crossorigin="anonymous"></script>
+<script src="{% static 'ace-1.4.12.min.js' %}" crossorigin="anonymous"></script>
 <script type="text/javascript">
+    let editor = ace.edit("editor", {
+        mode: "ace/mode/text",
+        autoScrollEditorIntoView: true,
+        wrap: false,
+        readOnly: true,
+        showPrintMargin: false,
+        highlightActiveLine: false,
+        highlightGutterLine: false,
+        fontSize: 15,
+        foldStyle: "manual",
+        fontFamily: "SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace",
+    });
+
     const cards = document.querySelectorAll('.card-header-icon')
-    
+    let mainTxt = document.querySelector('.main-txt-view')
+    const viewTxt = document.querySelector('.main-txt-btn')
+
     document.querySelectorAll('li').forEach(li => {
-        li.addEventListener('click', e => {
+        li.addEventListener('click', (event) => {
          if (!li.classList.contains('is-active')) {
             document.querySelectorAll('li').forEach(listItem => {
                 listItem.classList.remove('is-active')
@@ -57,18 +75,29 @@ <h1 class="title is-4">License Detection Summary</h1>
         })
     })
 
+    viewTxt.addEventListener('click', (event) => {
+        editor.setValue(viewTxt.getAttribute('data-text'))
+        mainTxt.innerHTML = "Input License Text"
+    })
+
     cards.forEach(card => {
       card.addEventListener('click', (event) => {
         event.preventDefault()
         content = card.parentElement.parentElement.parentElement.querySelector('.card-content').classList
         lineTag= card.parentElement.querySelector('.lines').classList
+        mainTxt.innerHTML = "Matched Text"
+        if (viewTxt.classList.contains('is-hidden')) {
+            viewTxt.classList.remove('is-hidden')
+        }
+        editor.setValue(card.getAttribute('data-text'))
         if (content.contains('is-hidden')) {
-          content.remove('is-hidden')
-          lineTag.add('is-hidden')
+            content.remove('is-hidden')
+            lineTag.add('is-hidden')
         } else {
-          content.add('is-hidden')
-          lineTag.remove('is-hidden')
+            content.add('is-hidden')
+            lineTag.remove('is-hidden')
         }
+
       })
     })
 
diff --git a/scantext/views.py b/scantext/views.py
index d9596bd9e..026d43b8f 100644
--- a/scantext/views.py
+++ b/scantext/views.py
@@ -26,7 +26,6 @@
 from django.conf import settings
 from django.contrib import messages
 from django.shortcuts import render
-from django.views import generic
 
 from scantext.forms import LicenseScanForm
 
@@ -90,7 +89,7 @@ def license_scanview(request):
                         request,
                         "scantext/license_summary.html",
                         {
-                            "text": input_text.split("\n"),
+                            "text": input_text,
                             "detected_licenses": expressions,
                         },
                     )
@@ -99,7 +98,7 @@ def license_scanview(request):
                 request,
                 "scantext/license_summary.html",
                 {
-                    "text": input_text.split("\n"),
+                    "text": input_text,
                     "detected_licenses": expressions,
                 },
             )

From b77976d1f71ae49445d86f41ad6f779f193d0e0c Mon Sep 17 00:00:00 2001
From: Akhil Raj <lf32.dev@gmail.com>
Date: Tue, 5 Jul 2022 18:33:49 +0530
Subject: [PATCH 21/59] Highlight and scroll down to matched text #450

Signed-off-by: Akhil Raj <lf32.dev@gmail.com>
---
 .../includes/license_summary_cards.html       | 11 +++-
 .../templates/scantext/license_summary.html   | 52 ++++++++++++++-----
 2 files changed, 48 insertions(+), 15 deletions(-)

diff --git a/scantext/templates/scantext/includes/license_summary_cards.html b/scantext/templates/scantext/includes/license_summary_cards.html
index 4906a2bdf..421a0614f 100644
--- a/scantext/templates/scantext/includes/license_summary_cards.html
+++ b/scantext/templates/scantext/includes/license_summary_cards.html
@@ -1,6 +1,6 @@
 {% for license in detected_licenses.licenses %}
 <div class="licenses-card">
-    <div class="card-header is-flex is-justify-content-space-between pr-2">
+    <div class="card-header is-flex is-justify-content-space-between">
         <div class="card-header-title" title="{{ license.short_name }}">
             {% if license.homepage_url %}
             <a href="{{ license.homepage_url }}" title="{{ license.name }} ({{ license.short_name }})">{{ license.short_name }}</a> {% else %} {{ license.short_name }} {% endif %}
@@ -11,7 +11,14 @@
             </p>
             <p class="tag is-6 mx-1 is-light 
                   {% if license.score == 100 %} is-success {% else %} is-warning {% endif %}">{{ license.score }}</p>
-            <p class="card-header-icon tag is-4 is-info" aria-label="more options" data-text="{{ license.matched_text }}">View</p>
+            <p class="card-header-icon"
+            data-key="{{ license.key }}"
+            data-startline="{{ license.start_line }}"
+            data-endline="{{ license.end_line }}">
+                <span class="icon">
+                    <i class="fa fa-angle-down" aria-hidden="true"></i>
+                </span>
+            </p>
         </div>
     </div>
     <div class="card-content is-hidden">
diff --git a/scantext/templates/scantext/license_summary.html b/scantext/templates/scantext/license_summary.html
index b19a71868..272d2df10 100644
--- a/scantext/templates/scantext/license_summary.html
+++ b/scantext/templates/scantext/license_summary.html
@@ -28,7 +28,6 @@ <h1 class="title is-4">License Detection Summary</h1>
         <div class="column is-two-third">
             <div class="is-flex is-justify-content-space-between mb-2">
                 <p class="main-txt-view title is-5">Input License Text</p>
-                <button class="main-txt-btn button is-info is-hidden" data-text="{{ text }}" >View Input License</button>
             </div>
             <div id="editor" style="height: 70vh; border: lightgrey 1px solid; position: sticky; top: 2vh;">{{ text }}</div>
         </div>
@@ -61,8 +60,6 @@ <h1 class="title is-4">License Detection Summary</h1>
     });
 
     const cards = document.querySelectorAll('.card-header-icon')
-    let mainTxt = document.querySelector('.main-txt-view')
-    const viewTxt = document.querySelector('.main-txt-btn')
 
     document.querySelectorAll('li').forEach(li => {
         li.addEventListener('click', (event) => {
@@ -75,21 +72,11 @@ <h1 class="title is-4">License Detection Summary</h1>
         })
     })
 
-    viewTxt.addEventListener('click', (event) => {
-        editor.setValue(viewTxt.getAttribute('data-text'))
-        mainTxt.innerHTML = "Input License Text"
-    })
-
     cards.forEach(card => {
       card.addEventListener('click', (event) => {
         event.preventDefault()
         content = card.parentElement.parentElement.parentElement.querySelector('.card-content').classList
         lineTag= card.parentElement.querySelector('.lines').classList
-        mainTxt.innerHTML = "Matched Text"
-        if (viewTxt.classList.contains('is-hidden')) {
-            viewTxt.classList.remove('is-hidden')
-        }
-        editor.setValue(card.getAttribute('data-text'))
         if (content.contains('is-hidden')) {
             content.remove('is-hidden')
             lineTag.add('is-hidden')
@@ -98,8 +85,47 @@ <h1 class="title is-4">License Detection Summary</h1>
             lineTag.remove('is-hidden')
         }
 
+        let key = card.getAttribute('data-key')
+        let start_line = card.getAttribute('data-startline')
+        let end_line = card.getAttribute('data-endline')
+
+        setDetectedValues(key, start_line, end_line);
       })
     })
 
+    // Range(startRow, startColumn, endRow, endColumn)
+    const Range = require("ace/range").Range
+
+    let setDetectedValues = (key, start_line, end_line) => {
+      let annotations = [];
+      removeAllMarkers();
+
+        // Indexes a 0-based in ace.js
+        let start_row = start_line - 1;
+        let start_column = 0;
+        let end_row = end_line - 1;
+        let end_column = 10000;
+
+        let range = new Range(start_row, start_column, end_row, end_column);
+        editor.session.addMarker(range, "ace-marker", "fullLine");
+        annotations.push({
+          row: start_row,
+          column: 0,
+          text: key,
+          type: "info",
+        });
+
+      editor.getSession().setAnnotations(annotations);
+      editor.renderer.scrollToLine(start_row);
+    }
+
+    function removeAllMarkers() {
+      let session = editor.getSession();
+      let markers = session.getMarkers();
+        for (const [key, value] of Object.entries(markers)) {
+          session.removeMarker(value.id);
+        }
+    }
+
 </script>
 {% endblock %}

From 03339322b184b0df929b335e7b0240d9c98cde59 Mon Sep 17 00:00:00 2001
From: Akhil Raj <lf32.dev@gmail.com>
Date: Tue, 5 Jul 2022 19:05:38 +0530
Subject: [PATCH 22/59] Close all cards while opening a new one #450

Signed-off-by: Akhil Raj <lf32.dev@gmail.com>
---
 scantext/templates/scantext/license_summary.html | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/scantext/templates/scantext/license_summary.html b/scantext/templates/scantext/license_summary.html
index 272d2df10..d5061ed03 100644
--- a/scantext/templates/scantext/license_summary.html
+++ b/scantext/templates/scantext/license_summary.html
@@ -26,9 +26,7 @@ <h1 class="title is-4">License Detection Summary</h1>
 
     <div class="info columns mb-5 mx-1">
         <div class="column is-two-third">
-            <div class="is-flex is-justify-content-space-between mb-2">
-                <p class="main-txt-view title is-5">Input License Text</p>
-            </div>
+            <p class="title is-5">Input License Text</p>
             <div id="editor" style="height: 70vh; border: lightgrey 1px solid; position: sticky; top: 2vh;">{{ text }}</div>
         </div>
         <div class="column is-one-third">
@@ -78,6 +76,10 @@ <h1 class="title is-4">License Detection Summary</h1>
         content = card.parentElement.parentElement.parentElement.querySelector('.card-content').classList
         lineTag= card.parentElement.querySelector('.lines').classList
         if (content.contains('is-hidden')) {
+            cards.forEach(eachcard => {
+                eachcard.parentElement.querySelector('.lines').classList.remove('is-hidden')
+                eachcard.parentElement.parentElement.parentElement.querySelector('.card-content').classList.add('is-hidden')
+            })
             content.remove('is-hidden')
             lineTag.add('is-hidden')
         } else {

From b2061458bdfcd198ca47e06811db37ca41dea390 Mon Sep 17 00:00:00 2001
From: Akhil Raj <lf32.dev@gmail.com>
Date: Thu, 7 Jul 2022 17:50:46 +0530
Subject: [PATCH 23/59] Removed details option in the view #450

Signed-off-by: Akhil Raj <lf32.dev@gmail.com>
---
 .../scantext/includes/license_detail.html     | 48 -------------------
 .../templates/scantext/license_summary.html   | 23 +--------
 2 files changed, 2 insertions(+), 69 deletions(-)
 delete mode 100644 scantext/templates/scantext/includes/license_detail.html

diff --git a/scantext/templates/scantext/includes/license_detail.html b/scantext/templates/scantext/includes/license_detail.html
deleted file mode 100644
index 3e9280b55..000000000
--- a/scantext/templates/scantext/includes/license_detail.html
+++ /dev/null
@@ -1,48 +0,0 @@
-<!-- code for the sticky divs in new ui -->
-<div class="info columns mb-5 mx-1">
-    <div class="column is-one-third">
-        <p class="title is-5">Detected Licenses</p>
-    </div>
-    <div class="column is-two-third">
-        <p class="title is-5">Matched Text</p>
-    </div>
-</div>
-
-{% for license in detected_licenses.licenses %}
-<div class="columns mb-5 mx-1">
-    <div class="column is-one-third mb-2">
-        <div id="sticky">
-            <div class="mb-2 is-link mx-1">
-                <a class="title is-5" href="{{ license.homepage_url }}" title="{{ license.name }} ({{ license.short_name }})"> {{ license.short_name }} <i class="fa fa-link is-link"></i></a>
-            </div>
-            <div class="is-flex">
-                <p class="tag is-6 mr-2 is-light {% if license.score == 100 %} is-success {% else %} is-warning {% endif %}">{{ license.score }}</p>
-                <p class="tag is-6 mr-2 is-light is-primary">{{ license.category }}</p>
-                <a class="tag is-6 mr-2 is-light is-link" href="{{ license.reference_url }}">ref</a>
-                <p class="tag is-6 mr-2 is-light is-info">
-                {% if license.start_line == license.end_line %} Line {{ license.start_line }} {% else %} Lines {{ license.start_line }} - {{ license.end_line }} {% endif %}
-                </p>
-            </div>
-        </div>
-    </div>
-    <div class="column is-two-third">
-        <article class="message is-info">
-            <div class="message-header is-hidden"></div>
-            <div class="message-body" style="white-space: pre-wrap; max-height: 30vh; overflow: scroll;">
-                {{ license.matched_text }}
-            </div>
-        </article>
-    </div>
-</div>
-<hr class="mx-1 mt-0">
-
-<style type="text/css">
-    #sticky {
-        position: sticky;
-        position: -webkit-sticky;
-        margin-top: 1vh;
-        top:  4vh;
-    }
-
-</style>
-{% endfor %}
diff --git a/scantext/templates/scantext/license_summary.html b/scantext/templates/scantext/license_summary.html
index d5061ed03..9ce777d53 100644
--- a/scantext/templates/scantext/license_summary.html
+++ b/scantext/templates/scantext/license_summary.html
@@ -13,18 +13,11 @@ <h1 class="title is-4">License Detection Summary</h1>
         </div>
     </section>
 
-    <!-- still under implementation -->
-    <div class="tabs">
-      <ul>
-        <li class="is-active"><a>Summary</a></li>
-        <li><a>Details</a></li>
-      </ul>
-    </div>
-
+    <hr class="mx-1">
     {% include 'scantext/includes/license_summary_header.html' with detected_licenses=detected_licenses %}
     <hr class="mx-1">
 
-    <div class="info columns mb-5 mx-1">
+    <div class="columns mb-5 mx-1">
         <div class="column is-two-third">
             <p class="title is-5">Input License Text</p>
             <div id="editor" style="height: 70vh; border: lightgrey 1px solid; position: sticky; top: 2vh;">{{ text }}</div>
@@ -37,7 +30,6 @@ <h1 class="title is-4">License Detection Summary</h1>
         </div>
     </div>
 
-    {% include 'scantext/includes/license_detail.html' with detected_licenses=detected_licenses %}
 </div>
 {% endblock %}
 
@@ -59,17 +51,6 @@ <h1 class="title is-4">License Detection Summary</h1>
 
     const cards = document.querySelectorAll('.card-header-icon')
 
-    document.querySelectorAll('li').forEach(li => {
-        li.addEventListener('click', (event) => {
-         if (!li.classList.contains('is-active')) {
-            document.querySelectorAll('li').forEach(listItem => {
-                listItem.classList.remove('is-active')
-            })
-            li.classList.add('is-active')
-         }
-        })
-    })
-
     cards.forEach(card => {
       card.addEventListener('click', (event) => {
         event.preventDefault()

From b53e45cd3b8a5415eed95859cadb2b654b6c6ad4 Mon Sep 17 00:00:00 2001
From: Akhil Raj <lf32.dev@gmail.com>
Date: Sat, 16 Jul 2022 16:27:49 +0530
Subject: [PATCH 24/59] Restore details tab with matched text #450

Signed-off-by: Akhil Raj <lf32.dev@gmail.com>
---
 .../includes/license_summary_cards.html       | 127 ++++++++++--------
 .../includes/license_summary_detail.html      |  48 +++++++
 .../templates/scantext/license_summary.html   |  51 ++++---
 3 files changed, 150 insertions(+), 76 deletions(-)
 create mode 100644 scantext/templates/scantext/includes/license_summary_detail.html

diff --git a/scantext/templates/scantext/includes/license_summary_cards.html b/scantext/templates/scantext/includes/license_summary_cards.html
index 421a0614f..cbeb49b84 100644
--- a/scantext/templates/scantext/includes/license_summary_cards.html
+++ b/scantext/templates/scantext/includes/license_summary_cards.html
@@ -1,67 +1,76 @@
-{% for license in detected_licenses.licenses %}
-<div class="licenses-card">
-    <div class="card-header is-flex is-justify-content-space-between">
-        <div class="card-header-title" title="{{ license.short_name }}">
-            {% if license.homepage_url %}
-            <a href="{{ license.homepage_url }}" title="{{ license.name }} ({{ license.short_name }})">{{ license.short_name }}</a> {% else %} {{ license.short_name }} {% endif %}
-        </div>
-        <div class="is-flex is-justify-content-row is-align-items-center">
-            <p class="lines tag is-6 mx-1 is-light is-info">
-                {% if license.start_line == license.end_line %} Line {{ license.start_line }} {% else %} Lines {{ license.start_line }} - {{ license.end_line }} {% endif %}
-            </p>
-            <p class="tag is-6 mx-1 is-light 
+<div class="columns mb-5 mx-1">
+    <div class="column is-two-third">
+        <p class="title is-5">Input License Text</p>
+        <div id="editor" style="height: 70vh; border: lightgrey 1px solid; position: sticky; top: 2vh;">{{ text }}</div>
+    </div>
+    <div class="column is-one-third">
+        <p class="title is-5">Detected Licenses</p>
+        <div class="card">
+            {% for license in detected_licenses.licenses %}
+            <div class="licenses-card">
+                <div class="card-header is-flex is-justify-content-space-between">
+                    <div class="card-header-title" title="{{ license.short_name }}">
+                        {% if license.homepage_url %}
+                        <a href="{{ license.homepage_url }}" title="{{ license.name }} ({{ license.short_name }})">{{ license.short_name }}</a> {% else %} {{ license.short_name }} {% endif %}
+                    </div>
+                    <div class="is-flex is-justify-content-row is-align-items-center">
+                        <p class="lines tag is-6 mx-1 is-light is-info">
+                            {% if license.start_line == license.end_line %} Line {{ license.start_line }} {% else %} Lines {{ license.start_line }} - {{ license.end_line }} {% endif %}
+                        </p>
+                        <p class="tag is-6 mx-1 is-light 
                   {% if license.score == 100 %} is-success {% else %} is-warning {% endif %}">{{ license.score }}</p>
-            <p class="card-header-icon"
-            data-key="{{ license.key }}"
-            data-startline="{{ license.start_line }}"
-            data-endline="{{ license.end_line }}">
-                <span class="icon">
+                        <p class="card-header-icon" data-key="{{ license.key }}" data-startline="{{ license.start_line }}" data-endline="{{ license.end_line }}">
+                            <span class="icon">
                     <i class="fa fa-angle-down" aria-hidden="true"></i>
                 </span>
-            </p>
-        </div>
-    </div>
-    <div class="card-content is-hidden">
-        <div class="content">
-            <div class="is-flex is-justify-content-space-between">
-                <div>
-                    <p class="title is-5">Details</p>
+                        </p>
+                    </div>
                 </div>
-                <div>
-                    <p class="tag is-6 mx-1 is-light is-info">{% if license.start_line == license.end_line %} Line {{ license.start_line }} {% else %} Lines {{ license.start_line }} - {{ license.end_line }} {% endif %}</p>
+                <div class="card-content is-hidden">
+                    <div class="content">
+                        <div class="is-flex is-justify-content-space-between">
+                            <div>
+                                <p class="title is-5">Details</p>
+                            </div>
+                            <div>
+                                <p class="tag is-6 mx-1 is-light is-info">{% if license.start_line == license.end_line %} Line {{ license.start_line }} {% else %} Lines {{ license.start_line }} - {{ license.end_line }} {% endif %}</p>
+                            </div>
+                        </div>
+                        <div class="table-container mt-3">
+                            <table class="table is-striped is-narrow is-hoverable is-fullwidth">
+                                <tr>
+                                    <td><strong>Name</strong></td>
+                                    <td>{{ license.name }}</td>
+                                </tr>
+                                <tr>
+                                    <td><strong>Score</strong></td>
+                                    <td>{{ license.score }}</td>
+                                </tr>
+                                <tr>
+                                    <td><strong>Owner</strong></td>
+                                    <td>{{ license.owner }}</td>
+                                </tr>
+                                <tr>
+                                    <td><strong>Category</strong></td>
+                                    <td>
+                                        <p class="tag is-4 is-light is-primary">{{ license.category }}</p>
+                                    </td>
+                                </tr>
+                                <tr>
+                                    <td><strong>SPDX Key</strong></td>
+                                    <td>{{ license.spdx_license_key }}</td>
+                                </tr>
+                                <tr>
+                                    <td><strong>Reference</strong></td>
+                                    <td><a href="{{ license.reference_url }}" target="_blank">{{ license.reference_url }}</a></td>
+                                </tr>
+                            </table>
+                        </div>
+                    </div>
                 </div>
             </div>
-            <div class="table-container mt-3">
-              <table class="table is-striped is-narrow is-hoverable is-fullwidth">
-                <tr>
-                    <td><strong>Name</strong></td>
-                    <td>{{ license.name }}</td>
-                </tr>
-                <tr>
-                    <td><strong>Score</strong></td>
-                    <td>{{ license.score }}</td>
-                </tr>
-                <tr>
-                    <td><strong>Owner</strong></td>
-                    <td>{{ license.owner }}</td>
-                </tr>
-                <tr>
-                    <td><strong>Category</strong></td>
-                    <td>
-                        <p class="tag is-4 is-light is-primary">{{ license.category }}</p>
-                    </td>
-                </tr>
-                <tr>
-                    <td><strong>SPDX Key</strong></td>
-                    <td>{{ license.spdx_license_key }}</td>
-                </tr>
-                <tr>
-                    <td><strong>Reference</strong></td>
-                    <td>{{ license.reference_url }}</td>
-                </tr>
-              </table>
-            </div>
+            {% endfor %}
+
         </div>
     </div>
-</div>
-{% endfor %}
+</div>
\ No newline at end of file
diff --git a/scantext/templates/scantext/includes/license_summary_detail.html b/scantext/templates/scantext/includes/license_summary_detail.html
new file mode 100644
index 000000000..57cddcfd9
--- /dev/null
+++ b/scantext/templates/scantext/includes/license_summary_detail.html
@@ -0,0 +1,48 @@
+<!-- code for the sticky divs in new ui -->
+<div id="details" class="columns mb-5 mx-1">
+    <div class="column is-one-third">
+        <p class="title is-5">Detected Licenses</p>
+    </div>
+    <div class="column is-two-third">
+        <p class="title is-5">Matched Text</p>
+    </div>
+</div>
+
+{% for license in detected_licenses.licenses %}
+<div class="columns mb-5 mx-1">
+    <div class="column is-one-third mb-2">
+        <div id="sticky">
+            <div class="mb-2 is-link mx-1">
+                <a class="title is-5" href="{{ license.homepage_url }}" title="{{ license.name }} ({{ license.short_name }})"> {{ license.short_name }} <i class="fa fa-link is-link"></i></a>
+            </div>
+            <div class="is-flex">
+                <p class="tag is-6 mr-2 is-light {% if license.score == 100 %} is-success {% else %} is-warning {% endif %}">{{ license.score }}</p>
+                <p class="tag is-6 mr-2 is-light is-primary">{{ license.category }}</p>
+                <a class="tag is-6 mr-2 is-light is-link" href="{{ license.reference_url }}">ref</a>
+                <p class="tag is-6 mr-2 is-light is-info">
+                {% if license.start_line == license.end_line %} Line {{ license.start_line }} {% else %} Lines {{ license.start_line }} - {{ license.end_line }} {% endif %}
+                </p>
+            </div>
+        </div>
+    </div>
+    <div class="column is-two-third">
+        <article class="message is-small">
+            <div class="message-header is-hidden"></div>
+            <div class="message-body" style="white-space: pre-wrap; max-height: 30vh; overflow: scroll;">
+                {{ license.matched_text }}
+            </div>
+        </article>
+    </div>
+</div>
+<hr class="mx-1 mt-0">
+
+<style type="text/css">
+    #sticky {
+        position: sticky;
+        position: -webkit-sticky;
+        margin-top: 1vh;
+        top:  4vh;
+    }
+
+</style>
+{% endfor %}
\ No newline at end of file
diff --git a/scantext/templates/scantext/license_summary.html b/scantext/templates/scantext/license_summary.html
index 9ce777d53..c5dfc5807 100644
--- a/scantext/templates/scantext/license_summary.html
+++ b/scantext/templates/scantext/license_summary.html
@@ -13,23 +13,24 @@ <h1 class="title is-4">License Detection Summary</h1>
         </div>
     </section>
 
-    <hr class="mx-1">
+    <!-- still under implementation -->
+    <div class="tabs">
+        <ul>
+            <li class="is-active"><a>Summary</a></li>
+            <li><a>Details</a></li>
+        </ul>
+    </div>
+
     {% include 'scantext/includes/license_summary_header.html' with detected_licenses=detected_licenses %}
     <hr class="mx-1">
 
-    <div class="columns mb-5 mx-1">
-        <div class="column is-two-third">
-            <p class="title is-5">Input License Text</p>
-            <div id="editor" style="height: 70vh; border: lightgrey 1px solid; position: sticky; top: 2vh;">{{ text }}</div>
-        </div>
-        <div class="column is-one-third">
-            <p class="title is-5">Detected Licenses</p>
-            <div class="card">
-                {% include 'scantext/includes/license_summary_cards.html' with detected_licenses=detected_licenses %}
-            </div>
-        </div>
-    </div>
+    <section class="tab-content">
+        {% include 'scantext/includes/license_summary_cards.html' with detected_licenses=detected_licenses %}
+    </section>
 
+    <section class="tab-content is-hidden">
+        {% include 'scantext/includes/license_summary_detail.html' with detected_licenses=detected_licenses %}
+    </section>
 </div>
 {% endblock %}
 
@@ -39,7 +40,7 @@ <h1 class="title is-4">License Detection Summary</h1>
     let editor = ace.edit("editor", {
         mode: "ace/mode/text",
         autoScrollEditorIntoView: true,
-        wrap: false,
+        wrap: true,
         readOnly: true,
         showPrintMargin: false,
         highlightActiveLine: false,
@@ -50,6 +51,23 @@ <h1 class="title is-4">License Detection Summary</h1>
     });
 
     const cards = document.querySelectorAll('.card-header-icon')
+    const tabContent = document.querySelectorAll('.tab-content')
+
+    document.querySelectorAll('li').forEach((li, index) => {
+        li.addEventListener('click', (event) => {
+         if (!li.classList.contains('is-active')) {
+            document.querySelectorAll('li').forEach(listItem => {
+                listItem.classList.remove('is-active')
+            })
+            li.classList.add('is-active')
+         }
+
+         tabContent.forEach(section => {
+            section.classList.add('is-hidden')
+         })
+         tabContent[index].classList.remove('is-hidden')
+        })
+    })
 
     cards.forEach(card => {
       card.addEventListener('click', (event) => {
@@ -90,7 +108,7 @@ <h1 class="title is-4">License Detection Summary</h1>
         let end_column = 10000;
 
         let range = new Range(start_row, start_column, end_row, end_column);
-        editor.session.addMarker(range, "ace-marker", "fullLine");
+        editor.session.addMarker(range, "ace-marker", "line");
         annotations.push({
           row: start_row,
           column: 0,
@@ -109,6 +127,5 @@ <h1 class="title is-4">License Detection Summary</h1>
           session.removeMarker(value.id);
         }
     }
-
 </script>
-{% endblock %}
+{% endblock %}
\ No newline at end of file

From 129306658a22e45cbacfeb9a2d6903111a2f1b8f Mon Sep 17 00:00:00 2001
From: Akhil Raj <lf32.dev@gmail.com>
Date: Thu, 28 Jul 2022 20:09:39 +0530
Subject: [PATCH 25/59] Highlight text with colors #450

* philippes code is working only for one match

Signed-off-by: Akhil Raj <lf32.dev@gmail.com>
---
 .../includes/license_summary_cards.html       |   6 +-
 .../includes/license_summary_detail.html      |   2 +-
 .../templates/scantext/license_summary.html   |  18 +-
 scantext/views.py                             | 217 +++++++++++++-----
 4 files changed, 179 insertions(+), 64 deletions(-)

diff --git a/scantext/templates/scantext/includes/license_summary_cards.html b/scantext/templates/scantext/includes/license_summary_cards.html
index cbeb49b84..88f63587b 100644
--- a/scantext/templates/scantext/includes/license_summary_cards.html
+++ b/scantext/templates/scantext/includes/license_summary_cards.html
@@ -52,9 +52,7 @@
                                 </tr>
                                 <tr>
                                     <td><strong>Category</strong></td>
-                                    <td>
-                                        <p class="tag is-4 is-light is-primary">{{ license.category }}</p>
-                                    </td>
+                                    <td>{{ license.category }}</td>
                                 </tr>
                                 <tr>
                                     <td><strong>SPDX Key</strong></td>
@@ -73,4 +71,4 @@
 
         </div>
     </div>
-</div>
\ No newline at end of file
+</div>
diff --git a/scantext/templates/scantext/includes/license_summary_detail.html b/scantext/templates/scantext/includes/license_summary_detail.html
index 57cddcfd9..f453940bd 100644
--- a/scantext/templates/scantext/includes/license_summary_detail.html
+++ b/scantext/templates/scantext/includes/license_summary_detail.html
@@ -29,7 +29,7 @@
         <article class="message is-small">
             <div class="message-header is-hidden"></div>
             <div class="message-body" style="white-space: pre-wrap; max-height: 30vh; overflow: scroll;">
-                {{ license.matched_text }}
+                {{ license.matched_text|safe }}
             </div>
         </article>
     </div>
diff --git a/scantext/templates/scantext/license_summary.html b/scantext/templates/scantext/license_summary.html
index c5dfc5807..176212fad 100644
--- a/scantext/templates/scantext/license_summary.html
+++ b/scantext/templates/scantext/license_summary.html
@@ -18,6 +18,8 @@ <h1 class="title is-4">License Detection Summary</h1>
         <ul>
             <li class="is-active"><a>Summary</a></li>
             <li><a>Details</a></li>
+            <li><a>License Matches</a></li>
+            <li><a>License Matches (whole)</a></li>
         </ul>
     </div>
 
@@ -31,6 +33,20 @@ <h1 class="title is-4">License Detection Summary</h1>
     <section class="tab-content is-hidden">
         {% include 'scantext/includes/license_summary_detail.html' with detected_licenses=detected_licenses %}
     </section>
+
+    <section class="tab-content is-hidden">
+        {% for text in detected_licenses.complete_text_in_array %}
+        <div class="box" style="white-space: pre-wrap; max-height: 30vh; overflow: scroll;">
+            {{ text|safe }}
+        </div>
+        {% endfor %}
+    </section>
+
+    <section class="tab-content is-hidden">
+        <div class="box" style="white-space: pre-wrap; max-height: 30vh; overflow: scroll;">
+            {{ detected_licenses.complete_text|safe }}
+        </div>
+    </section>
 </div>
 {% endblock %}
 
@@ -128,4 +144,4 @@ <h1 class="title is-4">License Detection Summary</h1>
         }
     }
 </script>
-{% endblock %}
\ No newline at end of file
+{% endblock %}
diff --git a/scantext/views.py b/scantext/views.py
index 026d43b8f..2c2df9a09 100644
--- a/scantext/views.py
+++ b/scantext/views.py
@@ -21,14 +21,18 @@
 # Visit https://github.com/nexB/scancode.io for support and download.
 
 import sys
+import attr
 import tempfile
 
 from django.conf import settings
 from django.contrib import messages
 from django.shortcuts import render
 
+from licensedcode.stopwords import STOPWORDS
+from licensedcode.match import tokenize_matched_text
 from scantext.forms import LicenseScanForm
 
+TRACE_HIGHLIGHTED_TEXT = True
 SCANCODE_REPO_URL = "https://github.com/nexB/scancode-toolkit"
 SCANCODE_BASE_URL = SCANCODE_REPO_URL + "/tree/develop/src/licensedcode/data/licenses"
 SCANCODE_LICENSE_TEXT_URL = SCANCODE_BASE_URL + "/{}.LICENSE"
@@ -39,62 +43,71 @@
 
 
 def license_scanview(request):
-    form = LicenseScanForm()
-    if request.method == "POST":
-        form = LicenseScanForm(request.POST, request.FILES)
-        if form.is_valid():
-            input_text = form.cleaned_data["input_text"]
-            input_file = request.FILES.get("input_file", False)
-            if not len(input_text) and not input_file:
-                message = "Please provide some text or a text file to scan."
-                messages.warning(request, message)
-                return render(
-                    request,
-                    "scantext/license_scan_form.html",
-                    {
-                        "form": LicenseScanForm(),
-                    },
-                )
-
-            # The flush in tempfile is required to ensure that the content is
-            # written to the disk before it's read by get_licenses function
-            if len(input_text):
-                with tempfile.NamedTemporaryFile(mode="w") as temp_file:
-                    temp_file.write(input_text)
-                    temp_file.flush()
-                    expressions = get_licenses(location=temp_file.name)
-            elif input_file:
-                try:
-                    with tempfile.NamedTemporaryFile(mode="w") as temp_file:
-                        input_text = str(input_file.read(), "UTF-8")
-                        temp_file.write(input_text)
-                        temp_file.flush()
-                        expressions = get_licenses(location=temp_file.name)
-                except UnicodeDecodeError:
-                    message = "Please upload a valid text file."
-                    messages.warning(request, message)
-                    return render(
-                        request,
-                        "scantext/license_scan_form.html",
-                        {
-                            "form": LicenseScanForm(),
-                        },
-                    )
-
-            if not len(expressions["licenses"]):
-                if not len(expressions["license_expressions"]):
-                    message = "Couldn't detect any license from the provided input."
-                    messages.info(request, message)
-                    return render(
-                        request,
-                        "scantext/license_summary.html",
-                        {
-                            "text": input_text,
-                            "detected_licenses": expressions,
-                        },
-                    )
+    if request.method != "POST":
+        return render(
+            request, "scantext/license_scan_form.html", {"form": LicenseScanForm()}
+        )
+
+    form = LicenseScanForm(request.POST, request.FILES)
+    if not form.is_valid():
+        return render(
+            request, "scantext/license_scan_form.html", {"form": LicenseScanForm()}
+        )
+
+    input_text = form.cleaned_data["input_text"]
+    input_file = request.FILES.get("input_file", False)
+
+    if input_text and input_file:
+        message = "Provide text or a text file but not both."
+        messages.warning(request, message)
+        return render(
+            request,
+            "scantext/license_scan_form.html",
+            {
+                "form": LicenseScanForm(),
+            },
+        )
+
+    if not input_text and not input_file:
+        message = "Provide text or a text file to scan."
+        messages.warning(request, message)
+        return render(
+            request,
+            "scantext/license_scan_form.html",
+            {
+                "form": LicenseScanForm(),
+            },
+        )
 
+    # The flush in tempfile is required to ensure that the content is
+    # written to the disk before it's read by get_licenses function
+    if input_text:
+        with tempfile.NamedTemporaryFile(mode="w") as temp_file:
+            temp_file.write(input_text)
+            temp_file.flush()
+            expressions = get_licenses(location=temp_file.name)
+    elif input_file:
+        try:
+            with tempfile.NamedTemporaryFile(mode="w") as temp_file:
+                input_text = str(input_file.read(), "UTF-8")
+                temp_file.write(input_text)
+                temp_file.flush()
+                expressions = get_licenses(location=temp_file.name)
+        except UnicodeDecodeError:
+            message = "Please upload a valid text file."
+            messages.warning(request, message)
             return render(
+                request,
+                "scantext/license_scan_form.html",
+                {
+                    "form": LicenseScanForm(),
+                },
+            )
+
+    if not expressions["licenses"] and not expressions["license_expressions"]:
+        message = "Couldn't detect any license from the provided input."
+        messages.info(request, message)
+        return render(
                 request,
                 "scantext/license_summary.html",
                 {
@@ -102,7 +115,18 @@ def license_scanview(request):
                     "detected_licenses": expressions,
                 },
             )
-    return render(request, "scantext/license_scan_form.html", {"form": form})
+    # if TRACE_HIGHLIGHTED_TEXT:
+    #     from pprint import pprint
+    #     pprint(expressions, indent=4)
+
+    return render(
+        request,
+        "scantext/license_summary.html",
+        {
+            "text": input_text,
+            "detected_licenses": expressions,
+        },
+    )
 
 
 def get_licenses(
@@ -123,10 +147,10 @@ def get_licenses(
     score lower than `minimum_score` are not returned.
     By Default ``unknown_licenses`` is set to True to detect unknown licenses.
     """
-    from licensedcode import cache
+    from licensedcode.cache import get_index
     from licensedcode.spans import Span
 
-    idx = cache.get_index()
+    idx = get_index()
 
     detected_licenses = []
     detected_expressions = []
@@ -141,6 +165,8 @@ def get_licenses(
 
     qspans = []
     match = None
+    complete_text = ''
+    complete_text_in_array = []
     for match in matches:
         qspans.append(match.qspan)
 
@@ -153,6 +179,18 @@ def get_licenses(
             )
         )
 
+        complete_text += get_highlighted_lines(
+                match=match,
+                stopwords=STOPWORDS,
+                trace=TRACE_HIGHLIGHTED_TEXT,
+            )
+
+        complete_text_in_array.append(get_highlighted_lines(
+                match=match,
+                stopwords=STOPWORDS,
+                trace=TRACE_HIGHLIGHTED_TEXT,
+            ))
+
     percentage_of_license_text = 0
     if match:
         # we need at least one match to compute a license_coverage
@@ -167,6 +205,8 @@ def get_licenses(
             ("licenses", detected_licenses),
             ("license_expressions", detected_expressions),
             ("percentage_of_license_text", percentage_of_license_text),
+            ("complete_text_in_array", complete_text_in_array),
+            ("complete_text", complete_text)
         ]
     )
 
@@ -184,7 +224,9 @@ def _licenses_data_from_match(
     licenses = cache.get_licenses_db()
 
     # Returned matched_text will also include the text detected
-    matched_text = match.matched_text(whole_lines=False, highlight=True)
+    matched_text = match.matched_text(whole_lines=False, highlight=True,
+        highlight_matched='<matched>{}</matched>',
+        highlight_not_matched='<notmatched>{}</notmatched>',)
 
     detected_licenses = []
     for license_key in match.rule.license_keys():
@@ -239,3 +281,62 @@ def _licenses_data_from_match(
         matched_rule["rule_relevance"] = match.rule.relevance
 
     return detected_licenses
+
+def logger_debug(*args): pass
+
+def get_highlighted_lines(
+    match,
+    stopwords=STOPWORDS,
+    trace=TRACE_HIGHLIGHTED_TEXT,
+):
+    """
+    Yield highlighted text lines (with line returns) for the whole of the matched and unmatched text of a ``query``.
+    """
+    query = match.query
+    tokens = tokenize_matched_text(
+        location=query.location,
+        query_string=query.query_string,
+        dictionary=query.idx.dictionary,
+        start_line=match.query.start_line,
+        _cache={},
+    )
+    tokens = tag_matched_tokens(tokens=tokens, match_qspan=match.qspan)
+
+    header = '''<style>
+      .license-match.log {color: #f1f1f1; background-color: #222; font-family: monospace;}
+      .license-match.wrap {white-space: pre-wrap;}
+      .not-matched {color:#ac0000;}
+      .matched {color:#00ac00;}
+    </style>
+    <div class="license-match">'''
+    footer = '''</div>'''
+
+    body = ''
+    highlight_matched = '<span class="matched">{}</span>'
+    highlight_not_matched = '<span class="not-matched">{}</span>'
+    for token in tokens:
+        val = token.value
+        if token.is_text and val.lower() not in stopwords:
+            if token.is_matched:
+                body += highlight_matched.format(val)
+            else:
+                body += highlight_not_matched.format(val)
+        else:
+            # we do not highlight punctuation and stopwords.
+            body += highlight_not_matched.format(val)
+
+    return header + body + footer
+
+
+def tag_matched_tokens(tokens, match_qspan):
+    """
+    Yield Tokens from a ``tokens`` iterable of Token objects.
+    Known matched tokens are tagged as "is_matched=True" if they are matched.
+    """
+    for tok in tokens:
+        # tagged known matched tokens (useful for highlighting)
+        if tok.pos != -1 and tok.is_known and tok.pos in match_qspan:
+            tok = attr.evolve(tok, is_matched=True)
+        yield tok
+
+

From a6b34de9304930c072bf475b61c7f218f1421b55 Mon Sep 17 00:00:00 2001
From: Akhil Raj <lf32.dev@gmail.com>
Date: Thu, 28 Jul 2022 21:02:45 +0530
Subject: [PATCH 26/59] Fix failing text for `make valid` #450

Signed-off-by: Akhil Raj <lf32.dev@gmail.com>
---
 scantext/views.py | 61 ++++++++++++++++++++++++++---------------------
 1 file changed, 34 insertions(+), 27 deletions(-)

diff --git a/scantext/views.py b/scantext/views.py
index 2c2df9a09..443fcebd3 100644
--- a/scantext/views.py
+++ b/scantext/views.py
@@ -21,15 +21,16 @@
 # Visit https://github.com/nexB/scancode.io for support and download.
 
 import sys
-import attr
 import tempfile
 
 from django.conf import settings
 from django.contrib import messages
 from django.shortcuts import render
 
-from licensedcode.stopwords import STOPWORDS
+import attr
 from licensedcode.match import tokenize_matched_text
+from licensedcode.stopwords import STOPWORDS
+
 from scantext.forms import LicenseScanForm
 
 TRACE_HIGHLIGHTED_TEXT = True
@@ -108,13 +109,13 @@ def license_scanview(request):
         message = "Couldn't detect any license from the provided input."
         messages.info(request, message)
         return render(
-                request,
-                "scantext/license_summary.html",
-                {
-                    "text": input_text,
-                    "detected_licenses": expressions,
-                },
-            )
+            request,
+            "scantext/license_summary.html",
+            {
+                "text": input_text,
+                "detected_licenses": expressions,
+            },
+        )
     # if TRACE_HIGHLIGHTED_TEXT:
     #     from pprint import pprint
     #     pprint(expressions, indent=4)
@@ -165,7 +166,7 @@ def get_licenses(
 
     qspans = []
     match = None
-    complete_text = ''
+    complete_text = ""
     complete_text_in_array = []
     for match in matches:
         qspans.append(match.qspan)
@@ -180,16 +181,18 @@ def get_licenses(
         )
 
         complete_text += get_highlighted_lines(
-                match=match,
-                stopwords=STOPWORDS,
-                trace=TRACE_HIGHLIGHTED_TEXT,
-            )
+            match=match,
+            stopwords=STOPWORDS,
+            trace=TRACE_HIGHLIGHTED_TEXT,
+        )
 
-        complete_text_in_array.append(get_highlighted_lines(
+        complete_text_in_array.append(
+            get_highlighted_lines(
                 match=match,
                 stopwords=STOPWORDS,
                 trace=TRACE_HIGHLIGHTED_TEXT,
-            ))
+            )
+        )
 
     percentage_of_license_text = 0
     if match:
@@ -206,7 +209,7 @@ def get_licenses(
             ("license_expressions", detected_expressions),
             ("percentage_of_license_text", percentage_of_license_text),
             ("complete_text_in_array", complete_text_in_array),
-            ("complete_text", complete_text)
+            ("complete_text", complete_text),
         ]
     )
 
@@ -224,9 +227,12 @@ def _licenses_data_from_match(
     licenses = cache.get_licenses_db()
 
     # Returned matched_text will also include the text detected
-    matched_text = match.matched_text(whole_lines=False, highlight=True,
-        highlight_matched='<matched>{}</matched>',
-        highlight_not_matched='<notmatched>{}</notmatched>',)
+    matched_text = match.matched_text(
+        whole_lines=False,
+        highlight=True,
+        highlight_matched="<matched>{}</matched>",
+        highlight_not_matched="<notmatched>{}</notmatched>",
+    )
 
     detected_licenses = []
     for license_key in match.rule.license_keys():
@@ -282,7 +288,10 @@ def _licenses_data_from_match(
 
     return detected_licenses
 
-def logger_debug(*args): pass
+
+def logger_debug(*args):
+    pass
+
 
 def get_highlighted_lines(
     match,
@@ -302,16 +311,16 @@ def get_highlighted_lines(
     )
     tokens = tag_matched_tokens(tokens=tokens, match_qspan=match.qspan)
 
-    header = '''<style>
+    header = """<style>
       .license-match.log {color: #f1f1f1; background-color: #222; font-family: monospace;}
       .license-match.wrap {white-space: pre-wrap;}
       .not-matched {color:#ac0000;}
       .matched {color:#00ac00;}
     </style>
-    <div class="license-match">'''
-    footer = '''</div>'''
+    <div class="license-match">"""
+    footer = """</div>"""
 
-    body = ''
+    body = ""
     highlight_matched = '<span class="matched">{}</span>'
     highlight_not_matched = '<span class="not-matched">{}</span>'
     for token in tokens:
@@ -338,5 +347,3 @@ def tag_matched_tokens(tokens, match_qspan):
         if tok.pos != -1 and tok.is_known and tok.pos in match_qspan:
             tok = attr.evolve(tok, is_matched=True)
         yield tok
-
-

From 1bdf9d19094f27887110cd1aedb5bb341e96b6ac Mon Sep 17 00:00:00 2001
From: Akhil Raj <lf32.dev@gmail.com>
Date: Thu, 28 Jul 2022 21:14:14 +0530
Subject: [PATCH 27/59] Fix failing test for `make valid` #450

Signed-off-by: Akhil Raj <lf32.dev@gmail.com>
---
 scantext/views.py | 27 +++++++++++++++++++++------
 1 file changed, 21 insertions(+), 6 deletions(-)

diff --git a/scantext/views.py b/scantext/views.py
index 443fcebd3..2e1ffe744 100644
--- a/scantext/views.py
+++ b/scantext/views.py
@@ -299,7 +299,8 @@ def get_highlighted_lines(
     trace=TRACE_HIGHLIGHTED_TEXT,
 ):
     """
-    Yield highlighted text lines (with line returns) for the whole of the matched and unmatched text of a ``query``.
+    Yield highlighted text lines (with line returns) for the whole
+    of the matched and unmatched text of a ``query``.
     """
     query = match.query
     tokens = tokenize_matched_text(
@@ -312,12 +313,26 @@ def get_highlighted_lines(
     tokens = tag_matched_tokens(tokens=tokens, match_qspan=match.qspan)
 
     header = """<style>
-      .license-match.log {color: #f1f1f1; background-color: #222; font-family: monospace;}
-      .license-match.wrap {white-space: pre-wrap;}
-      .not-matched {color:#ac0000;}
-      .matched {color:#00ac00;}
+    .license-match.log {
+        color: #f1f1f1;
+        background-color: #222;
+        font-family: monospace;
+    }
+
+    .license-match.wrap {
+        white-space: pre-wrap;
+    }
+
+    .not-matched {
+        color: #ac0000;
+    }
+
+    .matched {
+        color: #00ac00;
+    }
     </style>
-    <div class="license-match">"""
+    <div class="license-match">
+    """
     footer = """</div>"""
 
     body = ""

From 67462c5a957ff87813268dcbf109ccd2edd99c96 Mon Sep 17 00:00:00 2001
From: Akhil Raj <lf32.dev@gmail.com>
Date: Sun, 7 Aug 2022 12:19:19 +0530
Subject: [PATCH 28/59] Testing out new UI to match the projects page #450

Signed-off-by: Akhil Raj <lf32.dev@gmail.com>
---
 .../includes/license_summary_detail.html      | 87 ++++++++++---------
 .../templates/scantext/license_summary.html   | 30 ++-----
 scantext/views.py                             |  8 --
 3 files changed, 52 insertions(+), 73 deletions(-)

diff --git a/scantext/templates/scantext/includes/license_summary_detail.html b/scantext/templates/scantext/includes/license_summary_detail.html
index f453940bd..25990c3a5 100644
--- a/scantext/templates/scantext/includes/license_summary_detail.html
+++ b/scantext/templates/scantext/includes/license_summary_detail.html
@@ -1,48 +1,51 @@
-<!-- code for the sticky divs in new ui -->
-<div id="details" class="columns mb-5 mx-1">
-    <div class="column is-one-third">
-        <p class="title is-5">Detected Licenses</p>
-    </div>
-    <div class="column is-two-third">
-        <p class="title is-5">Matched Text</p>
-    </div>
-</div>
-
-{% for license in detected_licenses.licenses %}
 <div class="columns mb-5 mx-1">
-    <div class="column is-one-third mb-2">
-        <div id="sticky">
-            <div class="mb-2 is-link mx-1">
-                <a class="title is-5" href="{{ license.homepage_url }}" title="{{ license.name }} ({{ license.short_name }})"> {{ license.short_name }} <i class="fa fa-link is-link"></i></a>
-            </div>
-            <div class="is-flex">
-                <p class="tag is-6 mr-2 is-light {% if license.score == 100 %} is-success {% else %} is-warning {% endif %}">{{ license.score }}</p>
-                <p class="tag is-6 mr-2 is-light is-primary">{{ license.category }}</p>
-                <a class="tag is-6 mr-2 is-light is-link" href="{{ license.reference_url }}">ref</a>
-                <p class="tag is-6 mr-2 is-light is-info">
-                {% if license.start_line == license.end_line %} Line {{ license.start_line }} {% else %} Lines {{ license.start_line }} - {{ license.end_line }} {% endif %}
-                </p>
-            </div>
-        </div>
-    </div>
     <div class="column is-two-third">
-        <article class="message is-small">
-            <div class="message-header is-hidden"></div>
-            <div class="message-body" style="white-space: pre-wrap; max-height: 30vh; overflow: scroll;">
-                {{ license.matched_text|safe }}
+        <p class="title is-5">Input License Text</p>
+        {% for text in detected_licenses.complete_text_in_array %}
+        <section class="match-container box" style="white-space: pre-wrap; max-height: 70vh; overflow: scroll;">
+            {{ text|safe }}
+        </section>
+        {% endfor %}
+    </div>
+    <div class="column is-one-third">
+        <article class="panel is-info">
+            <div class="panel-heading is-flex is-justify-content-space-between is-align-items-center py-0 px-3">
+                <p>Detected Licenses</p>
+                <div>
+                    <button class="button px-2 previous-btn is-info"><i class="fas fa-arrow-up"></i></button>
+                    <button class="button px-2 next-btn is-info"><i class="fas fa-arrow-down"></i></button>
+                </div>
             </div>
+            {% for license in detected_licenses.licenses %}
+            <table class="license-table table is-fullwidth">
+                <tbody>
+                    <tr>
+                        <td><strong>Name</strong></td>
+                        <td>{{ license.name }}</td>
+                    </tr>
+                    <tr>
+                        <td><strong>Score</strong></td>
+                        <td>{{ license.score }}</td>
+                    </tr>
+                    <tr>
+                        <td><strong>Owner</strong></td>
+                        <td>{{ license.owner }}</td>
+                    </tr>
+                    <tr>
+                        <td><strong>Category</strong></td>
+                        <td>{{ license.category }}</td>
+                    </tr>
+                    <tr>
+                        <td><strong>SPDX Key</strong></td>
+                        <td>{{ license.spdx_license_key }}</td>
+                    </tr>
+                    <tr>
+                        <td><strong>Reference</strong></td>
+                        <td><a href="{{ license.reference_url }}" target="_blank">{{ license.reference_url }}</a></td>
+                    </tr>
+                </tbody>
+            </table>
+            {% endfor %}
         </article>
     </div>
 </div>
-<hr class="mx-1 mt-0">
-
-<style type="text/css">
-    #sticky {
-        position: sticky;
-        position: -webkit-sticky;
-        margin-top: 1vh;
-        top:  4vh;
-    }
-
-</style>
-{% endfor %}
\ No newline at end of file
diff --git a/scantext/templates/scantext/license_summary.html b/scantext/templates/scantext/license_summary.html
index 176212fad..f95a7a96f 100644
--- a/scantext/templates/scantext/license_summary.html
+++ b/scantext/templates/scantext/license_summary.html
@@ -15,38 +15,22 @@ <h1 class="title is-4">License Detection Summary</h1>
 
     <!-- still under implementation -->
     <div class="tabs">
-        <ul>
-            <li class="is-active"><a>Summary</a></li>
-            <li><a>Details</a></li>
-            <li><a>License Matches</a></li>
-            <li><a>License Matches (whole)</a></li>
+        <ul class="nav">
+            <li><a>Summary In Ace</a></li>
+            <li class="is-active"><a>Summary with Highlighted Text</a></li>
         </ul>
     </div>
 
     {% include 'scantext/includes/license_summary_header.html' with detected_licenses=detected_licenses %}
     <hr class="mx-1">
 
-    <section class="tab-content">
+    <section class="tab-container is-hidden">
         {% include 'scantext/includes/license_summary_cards.html' with detected_licenses=detected_licenses %}
     </section>
 
-    <section class="tab-content is-hidden">
+    <section class="tab-container">
         {% include 'scantext/includes/license_summary_detail.html' with detected_licenses=detected_licenses %}
     </section>
-
-    <section class="tab-content is-hidden">
-        {% for text in detected_licenses.complete_text_in_array %}
-        <div class="box" style="white-space: pre-wrap; max-height: 30vh; overflow: scroll;">
-            {{ text|safe }}
-        </div>
-        {% endfor %}
-    </section>
-
-    <section class="tab-content is-hidden">
-        <div class="box" style="white-space: pre-wrap; max-height: 30vh; overflow: scroll;">
-            {{ detected_licenses.complete_text|safe }}
-        </div>
-    </section>
 </div>
 {% endblock %}
 
@@ -67,9 +51,9 @@ <h1 class="title is-4">License Detection Summary</h1>
     });
 
     const cards = document.querySelectorAll('.card-header-icon')
-    const tabContent = document.querySelectorAll('.tab-content')
+    const tabContent = document.querySelectorAll('.tab-container')
 
-    document.querySelectorAll('li').forEach((li, index) => {
+    document.querySelectorAll('.nav li').forEach((li, index) => {
         li.addEventListener('click', (event) => {
          if (!li.classList.contains('is-active')) {
             document.querySelectorAll('li').forEach(listItem => {
diff --git a/scantext/views.py b/scantext/views.py
index 2e1ffe744..5dcb703d2 100644
--- a/scantext/views.py
+++ b/scantext/views.py
@@ -166,7 +166,6 @@ def get_licenses(
 
     qspans = []
     match = None
-    complete_text = ""
     complete_text_in_array = []
     for match in matches:
         qspans.append(match.qspan)
@@ -180,12 +179,6 @@ def get_licenses(
             )
         )
 
-        complete_text += get_highlighted_lines(
-            match=match,
-            stopwords=STOPWORDS,
-            trace=TRACE_HIGHLIGHTED_TEXT,
-        )
-
         complete_text_in_array.append(
             get_highlighted_lines(
                 match=match,
@@ -209,7 +202,6 @@ def get_licenses(
             ("license_expressions", detected_expressions),
             ("percentage_of_license_text", percentage_of_license_text),
             ("complete_text_in_array", complete_text_in_array),
-            ("complete_text", complete_text),
         ]
     )
 

From 0f675ffaeef4d43a8bd59185f3c9de891943d06a Mon Sep 17 00:00:00 2001
From: Akhil Raj <lf32.dev@gmail.com>
Date: Tue, 9 Aug 2022 15:25:15 +0530
Subject: [PATCH 29/59] Add match_text tests to scantext #450

Signed-off-by: Akhil Raj <lf32.dev@gmail.com>
---
 scantext/match_text.py                        |  501 ++++++
 .../tests/data/matched_text/binary_text/gosu  |  Bin 0 -> 1712 bytes
 .../binary_text/rules/gpl-3.0_rdesc_1.RULE    |    1 +
 .../binary_text/rules/gpl-3.0_rdesc_1.yml     |    2 +
 .../tests/data/matched_text/ffmpeg/ffmpeg     |  Bin 0 -> 6136 bytes
 .../tests/data/matched_text/ffmpeg/ffmpeg.exe |  Bin 0 -> 16136 bytes
 .../data/matched_text/ffmpeg/libavsample.lib  |  Bin 0 -> 1783 bytes
 .../index/rules/gpl-2.0_bare_single_word.RULE |    1 +
 .../index/rules/gpl-2.0_bare_single_word.yml  |    3 +
 .../index/rules/gpl-2.0_or_apache-2.0_2.RULE  |    3 +
 .../index/rules/gpl-2.0_or_apache-2.0_2.yml   |    6 +
 .../matched_text/index/rules/mit_101.RULE     |    2 +
 .../data/matched_text/index/rules/mit_101.yml |    5 +
 scantext/tests/data/matched_text/query.txt    |    4 +
 .../tests/data/matched_text/spdx/query.txt    |   12 +
 .../tokenize_matched_text_query.txt           |    1 +
 .../data/matched_text/turkish_unicode/query   |   20 +
 .../turkish_unicode/rules/rule1.RULE          |    1 +
 .../turkish_unicode/rules/rule1.yml           |    1 +
 .../turkish_unicode/rules/rule2.RULE          |    2 +
 .../turkish_unicode/rules/rule2.yml           |    1 +
 .../turkish_unicode/rules/rule3.RULE          |    1 +
 .../turkish_unicode/rules/rule3.yml           |    1 +
 .../turkish_unicode/rules/rule4.RULE          |    1 +
 .../turkish_unicode/rules/rule4.yml           |    1 +
 .../data/matched_text/unicode_text/main3.js   |    1 +
 scantext/tests/test.py                        |   25 -
 scantext/tests/test_match_text.py             | 1495 +++++++++++++++++
 28 files changed, 2066 insertions(+), 25 deletions(-)
 create mode 100644 scantext/match_text.py
 create mode 100644 scantext/tests/data/matched_text/binary_text/gosu
 create mode 100644 scantext/tests/data/matched_text/binary_text/rules/gpl-3.0_rdesc_1.RULE
 create mode 100644 scantext/tests/data/matched_text/binary_text/rules/gpl-3.0_rdesc_1.yml
 create mode 100644 scantext/tests/data/matched_text/ffmpeg/ffmpeg
 create mode 100644 scantext/tests/data/matched_text/ffmpeg/ffmpeg.exe
 create mode 100644 scantext/tests/data/matched_text/ffmpeg/libavsample.lib
 create mode 100644 scantext/tests/data/matched_text/index/rules/gpl-2.0_bare_single_word.RULE
 create mode 100644 scantext/tests/data/matched_text/index/rules/gpl-2.0_bare_single_word.yml
 create mode 100644 scantext/tests/data/matched_text/index/rules/gpl-2.0_or_apache-2.0_2.RULE
 create mode 100644 scantext/tests/data/matched_text/index/rules/gpl-2.0_or_apache-2.0_2.yml
 create mode 100644 scantext/tests/data/matched_text/index/rules/mit_101.RULE
 create mode 100644 scantext/tests/data/matched_text/index/rules/mit_101.yml
 create mode 100644 scantext/tests/data/matched_text/query.txt
 create mode 100644 scantext/tests/data/matched_text/spdx/query.txt
 create mode 100644 scantext/tests/data/matched_text/tokenize_matched_text_query.txt
 create mode 100644 scantext/tests/data/matched_text/turkish_unicode/query
 create mode 100644 scantext/tests/data/matched_text/turkish_unicode/rules/rule1.RULE
 create mode 100644 scantext/tests/data/matched_text/turkish_unicode/rules/rule1.yml
 create mode 100644 scantext/tests/data/matched_text/turkish_unicode/rules/rule2.RULE
 create mode 100644 scantext/tests/data/matched_text/turkish_unicode/rules/rule2.yml
 create mode 100644 scantext/tests/data/matched_text/turkish_unicode/rules/rule3.RULE
 create mode 100644 scantext/tests/data/matched_text/turkish_unicode/rules/rule3.yml
 create mode 100644 scantext/tests/data/matched_text/turkish_unicode/rules/rule4.RULE
 create mode 100644 scantext/tests/data/matched_text/turkish_unicode/rules/rule4.yml
 create mode 100644 scantext/tests/data/matched_text/unicode_text/main3.js
 delete mode 100644 scantext/tests/test.py
 create mode 100644 scantext/tests/test_match_text.py

diff --git a/scantext/match_text.py b/scantext/match_text.py
new file mode 100644
index 000000000..e43a60ca2
--- /dev/null
+++ b/scantext/match_text.py
@@ -0,0 +1,501 @@
+#
+# Copyright (c) nexB Inc. and others. All rights reserved.
+# ScanCode is a trademark of nexB Inc.
+# SPDX-License-Identifier: Apache-2.0
+# See http://www.apache.org/licenses/LICENSE-2.0 for the license text.
+# See https://github.com/nexB/scancode-toolkit for support or download.
+# See https://aboutcode.org for more information about nexB OSS projects.
+#
+
+from enum import IntEnum
+from itertools import groupby
+
+import attr
+from attr import validators
+from licensedcode import query
+from licensedcode.spans import Span
+from licensedcode.stopwords import STOPWORDS
+from licensedcode.tokenize import index_tokenizer
+from licensedcode.tokenize import matched_query_text_tokenizer
+
+TRACE = False
+TRACE_MATCHED_TEXT = False
+TRACE_MATCHED_TEXT_DETAILS = False
+
+
+def logger_debug(*args):
+    pass
+
+
+if TRACE or TRACE_MATCHED_TEXT or TRACE_MATCHED_TEXT_DETAILS:
+
+    use_print = True
+    if use_print:
+        prn = print
+    else:
+        import logging
+        import sys
+
+        logger = logging.getLogger(__name__)
+        # logging.basicConfig(level=logging.DEBUG, stream=sys.stdout)
+        logging.basicConfig(stream=sys.stdout)
+        logger.setLevel(logging.DEBUG)
+        prn = logger.debug
+
+    def logger_debug(*args):
+        return prn(" ".join(isinstance(a, str) and a or repr(a) for a in args))
+
+    def _debug_print_matched_query_text(match, extras=5):
+        """
+        Print a matched query text including `extras` tokens before and after
+        the match. Used for debugging license matches.
+        """
+        # Create a fake new match with extra tokens before and after
+        new_match = match.combine(match)
+        new_qstart = max([0, match.qstart - extras])
+        new_qend = min([match.qend + extras, len(match.query.tokens)])
+        new_qspan = Span(new_qstart, new_qend)
+        new_match.qspan = new_qspan
+
+        logger_debug(new_match)
+        logger_debug(" MATCHED QUERY TEXT with extras")
+        qt = new_match.matched_text(whole_lines=False)
+        logger_debug(qt)
+
+
+@attr.s(slots=True, frozen=True)
+class Token(object):
+    """
+    Used to represent a token in collected query-side matched texts and SPDX
+    identifiers.
+    """
+
+    # original text value for this token.
+    value = attr.ib()
+    # line number, one-based
+    line_num = attr.ib()
+    # absolute position for known tokens, zero-based. -1 for unknown tokens
+    pos = attr.ib(default=-1)
+    # True if text/alpha False if this is punctuation or spaces
+    is_text = attr.ib(default=False)
+    # True if part of a match
+    is_matched = attr.ib(default=False)
+    # True if this is a known token
+    is_known = attr.ib(default=False)
+
+
+def tokenize_matched_text(
+    location,
+    query_string,
+    dictionary,
+    start_line=1,
+    _cache={},
+):
+    """
+    Return a list of Token objects with pos and line number collected from the
+    file at `location` or the `query_string` string. `dictionary` is the index
+    mapping a token string to a token id.
+
+    NOTE: the _cache={} arg IS A GLOBAL mutable by design.
+    """
+    key = location, query_string, start_line
+    cached = _cache.get(key)
+    if cached:
+        return cached
+    # we only cache the last call
+    _cache.clear()
+    _cache[key] = result = list(
+        _tokenize_matched_text(
+            location=location,
+            query_string=query_string,
+            dictionary=dictionary,
+            start_line=start_line,
+        )
+    )
+    return result
+
+
+def _tokenize_matched_text(
+    location,
+    query_string,
+    dictionary,
+    start_line=1,
+    trace=TRACE_MATCHED_TEXT_DETAILS,
+):
+    """
+    Yield Token objects with pos and line number collected from the file at
+    `location` or the `query_string` string. `dictionary` is the index mapping
+    of tokens to token ids.
+    """
+    pos = 0
+    qls = query.query_lines(
+        location=location,
+        query_string=query_string,
+        strip=False,
+        start_line=start_line,
+    )
+    for line_num, line in qls:
+        if trace:
+            logger_debug(
+                "  _tokenize_matched_text:", "line_num:", line_num, "line:", line
+            )
+
+        for is_text, token_str in matched_query_text_tokenizer(line):
+            if trace:
+                logger_debug("     is_text:", is_text, "token_str:", repr(token_str))
+
+            # Determine if a token is is_known in the license index or not. This
+            # is essential as we need to realign the query-time tokenization
+            # with the full text to report proper matches.
+            if is_text and token_str and token_str.strip():
+
+                # we retokenize using the query tokenizer:
+                # 1. to lookup for is_known tokens in the index dictionary
+
+                # 2. to ensure the number of tokens is the same in both
+                # tokenizers (though, of course, the case will differ as the
+                # regular query tokenizer ignores case and punctuations).
+
+                # NOTE: we have a rare Unicode bug/issue because of some Unicode
+                # codepoint such as some Turkish characters that decompose to
+                # char + punct when casefolded. This should be fixed in Unicode
+                # release 14 and up and likely implemented in Python 3.10 and up
+                # See https://github.com/nexB/scancode-toolkit/issues/1872
+                # See also: https://bugs.python.org/issue34723#msg359514
+                qtokenized = list(index_tokenizer(token_str))
+                if not qtokenized:
+
+                    yield Token(
+                        value=token_str,
+                        line_num=line_num,
+                        is_text=is_text,
+                        is_known=False,
+                        pos=-1,
+                    )
+
+                elif len(qtokenized) == 1:
+                    is_known = qtokenized[0] in dictionary
+                    if is_known:
+                        p = pos
+                        pos += 1
+                    else:
+                        p = -1
+
+                    yield Token(
+                        value=token_str,
+                        line_num=line_num,
+                        is_text=is_text,
+                        is_known=is_known,
+                        pos=p,
+                    )
+                else:
+                    # we have two or more tokens from the original query mapped
+                    # to a single matched text tokenizer token.
+                    for qtoken in qtokenized:
+                        is_known = qtoken in dictionary
+                        if is_known:
+                            p = pos
+                            pos += 1
+                        else:
+                            p = -1
+
+                        yield Token(
+                            value=qtoken,
+                            line_num=line_num,
+                            is_text=is_text,
+                            is_known=is_known,
+                            pos=p,
+                        )
+            else:
+
+                yield Token(
+                    value=token_str,
+                    line_num=line_num,
+                    is_text=False,
+                    is_known=False,
+                    pos=-1,
+                )
+
+
+def reportable_tokens(
+    tokens,
+    match_qspan,
+    start_line,
+    end_line,
+    whole_lines=False,
+    trace=TRACE_MATCHED_TEXT_DETAILS,
+):
+    """
+    Yield Tokens from a ``tokens`` iterable of Token objects (built from a query-
+    side scanned file or string) that are inside a ``match_qspan`` matched Span
+    starting at `start_line` and ending at ``end_line``. If whole_lines is True,
+    also yield unmatched Tokens that are before and after the match and on the
+    first and last line of a match (unless the lines are very long text lines or
+    the match is from binary content.)
+
+    As a side effect, known matched tokens are tagged as "is_matched=True" if
+    they are matched.
+
+    If ``whole_lines`` is True, any token within matched lines range is
+    included. Otherwise, a token is included if its position is within the
+    matched ``match_qspan`` or it is a punctuation token immediately after the
+    matched ``match_qspan`` even though not matched.
+    """
+    start = match_qspan.start
+    end = match_qspan.end
+
+    started = False
+    finished = False
+
+    end_pos = 0
+    last_pos = 0
+    for real_pos, tok in enumerate(tokens):
+        if trace:
+            logger_debug("reportable_tokens: processing", real_pos, tok)
+
+        # ignore tokens outside the matched lines range
+        if tok.line_num < start_line:
+            if trace:
+                logger_debug(
+                    "  tok.line_num < start_line:", tok.line_num, "<", start_line
+                )
+
+            continue
+
+        if tok.line_num > end_line:
+            if trace:
+                logger_debug("  tok.line_num > end_line", tok.line_num, ">", end_line)
+
+            break
+
+        if trace:
+            logger_debug("reportable_tokens:", real_pos, tok)
+
+        is_included = False
+
+        # tagged known matched tokens (useful for highlighting)
+        if tok.pos != -1 and tok.is_known and tok.pos in match_qspan:
+            tok = attr.evolve(tok, is_matched=True)
+            is_included = True
+            if trace:
+                logger_debug("  tok.is_matched = True", "match_qspan:", match_qspan)
+        else:
+            if trace:
+                logger_debug(
+                    "  unmatched token: tok.is_matched = False",
+                    "match_qspan:",
+                    match_qspan,
+                    "tok.pos in match_qspan:",
+                    tok.pos in match_qspan,
+                )
+
+        if whole_lines:
+            # we only work on matched lines so no need to test further
+            # if start_line <= tok.line_num <= end_line.
+            if trace:
+                logger_debug("  whole_lines")
+
+            is_included = True
+
+        else:
+            # Are we in the match_qspan range or a punctuation right before or after
+            # that range?
+
+            # start
+            if not started and tok.pos == start:
+                started = True
+                if trace:
+                    logger_debug("  start")
+
+                is_included = True
+
+            # middle
+            if started and not finished:
+                if trace:
+                    logger_debug("    middle")
+
+                is_included = True
+
+            if tok.pos == end:
+                if trace:
+                    logger_debug("  at end")
+
+                finished = True
+                started = False
+                end_pos = real_pos
+
+            # one punctuation token after a match
+            if finished and not started and end_pos and last_pos == end_pos:
+                end_pos = 0
+                if not tok.is_text:
+                    # strip the trailing spaces of the last token
+                    if tok.value.strip():
+                        if trace:
+                            logger_debug("  end yield")
+
+                        is_included = True
+
+        last_pos = real_pos
+        if is_included:
+            yield tok
+
+
+def get_full_matched_text(
+    match,
+    location=None,
+    query_string=None,
+    idx=None,
+    whole_lines=False,
+    highlight=True,
+    highlight_matched="{}",
+    highlight_not_matched="[{}]",
+    only_matched=False,
+    stopwords=STOPWORDS,
+    _usecache=True,
+    trace=TRACE_MATCHED_TEXT,
+):
+    """
+    Yield strings corresponding to the full matched query text given a ``match``
+    LicenseMatch detected with an `idx` LicenseIndex in a query file at
+    ``location`` or a ``query_string``.
+
+    See get_full_qspan_matched_text() for other arguments documentation
+    """
+    if trace:
+        logger_debug("get_full_matched_text:  match:", match)
+
+    return get_full_qspan_matched_text(
+        match_qspan=match.qspan,
+        match_query_start_line=match.query.start_line,
+        match_start_line=match.start_line,
+        match_end_line=match.end_line,
+        location=location,
+        query_string=query_string,
+        idx=idx,
+        whole_lines=whole_lines,
+        highlight=highlight,
+        highlight_matched=highlight_matched,
+        highlight_not_matched=highlight_not_matched,
+        only_matched=only_matched,
+        stopwords=stopwords,
+        _usecache=_usecache,
+        trace=trace,
+    )
+
+
+def get_full_qspan_matched_text(
+    match_qspan,
+    match_query_start_line,
+    match_start_line,
+    match_end_line,
+    location=None,
+    query_string=None,
+    idx=None,
+    whole_lines=False,
+    highlight=True,
+    highlight_matched="{}",
+    highlight_not_matched="[{}]",
+    only_matched=False,
+    stopwords=STOPWORDS,
+    _usecache=True,
+    trace=TRACE_MATCHED_TEXT,
+):
+    """
+    Yield strings corresponding to words of the matched query text given a
+    ``match_qspan`` LicenseMatch qspan Span detected with an `idx` LicenseIndex
+    in a query file at ``location`` or a ``query_string``.
+
+    - ``match_query_start_line`` is the match query.start_line
+    - ``match_start_line`` is the match start_line
+    - ``match_end_line`` is the match= end_line
+
+    The returned strings contains the full text including punctuations and
+    spaces that are not participating in the match proper including punctuations.
+
+    If ``whole_lines`` is True, the unmatched part at the start of the first
+    matched line and the unmatched part at the end of the last matched lines are
+    also included in the returned text (unless the line is very long).
+
+    If ``highlight`` is True, each token is formatted for "highlighting" and
+    emphasis with the ``highlight_matched`` format string for matched tokens or to
+    the ``highlight_not_matched`` for tokens not matched. The default is to
+    enclose an unmatched token sequence in [] square brackets. Punctuation is
+    not highlighted.
+
+    if ``only_matched`` is True, only matched tokens are returned and
+    ``whole_lines`` and ``highlight`` are ignored. Unmatched words are replaced
+    by a "dot".
+
+    If ``_usecache`` is True, the tokenized text is cached for efficiency.
+    """
+    if trace:
+        logger_debug("get_full_qspan_matched_text:  match_qspan:", match_qspan)
+        logger_debug("get_full_qspan_matched_text:  location:", location)
+        logger_debug("get_full_qspan_matched_text:  query_string :", query_string)
+
+    assert location or query_string
+    assert idx
+
+    if only_matched:
+        # use highlighting to skip the reporting of unmatched entirely
+        whole_lines = False
+        highlight = True
+        highlight_matched = "{}"
+        highlight_not_matched = "."
+        highlight = True
+
+    # Create and process a stream of Tokens
+    if not _usecache:
+        # for testing only, reset cache on each call
+        tokens = tokenize_matched_text(
+            location=location,
+            query_string=query_string,
+            dictionary=idx.dictionary,
+            start_line=match_query_start_line,
+            _cache={},
+        )
+    else:
+        tokens = tokenize_matched_text(
+            location=location,
+            query_string=query_string,
+            dictionary=idx.dictionary,
+            start_line=match_query_start_line,
+        )
+
+    if trace:
+        tokens = list(tokens)
+        print()
+        logger_debug("get_full_qspan_matched_text:  tokens:")
+        for t in tokens:
+            print("    ", t)
+        print()
+
+    tokens = reportable_tokens(
+        tokens=tokens,
+        match_qspan=match_qspan,
+        start_line=match_start_line,
+        end_line=match_end_line,
+        whole_lines=whole_lines,
+    )
+
+    if trace:
+        tokens = list(tokens)
+        logger_debug("get_full_qspan_matched_text:  reportable_tokens:")
+        for t in tokens:
+            print(t)
+        print()
+
+    # Finally yield strings with eventual highlightings
+    for token in tokens:
+        val = token.value
+        if not highlight:
+            yield val
+        else:
+            if token.is_text and val.lower() not in stopwords:
+                if token.is_matched:
+                    yield highlight_matched.format(val)
+                else:
+                    yield highlight_not_matched.format(val)
+            else:
+                # we do not highlight punctuation and stopwords.
+                yield val
diff --git a/scantext/tests/data/matched_text/binary_text/gosu b/scantext/tests/data/matched_text/binary_text/gosu
new file mode 100644
index 0000000000000000000000000000000000000000..61b925fe68c8f0f791a440e370e9b61dfc0684ae
GIT binary patch
literal 1712
zcmbVM%WD%s7@t108fhC8@j6gwRcJHsnJrRsX)Rc=BG!WlO_OaBOtYoC@j)rgv3S*E
z4<35;;HmB1gU2GG$Mz!hk5F)CXJ*|5q2R#I?|c2e@0-WY?)=ii;J^UzV-U;%qn+%|
zr>3%VeEAy!%0Ph;J&%DQa9R6@_2=WtPV%_tcpgE~CNv!L?T=f1f4rOu9&6SNJepWD
zOu9>{<fpO|ES`?q0v>%m!%K4%T?55s%X8ShcO%t3=6Pm~V0psEFIRi8yJ8o(7Ytdu
zOaJI+QcHQ(df|M<MRacthv4po;aPy^WrqJ0%qq;6^;|65Om01=zuiGutKS}{0q{^u
zhCk>n+5XxDbMHsICq>w!0npXD+rb`B`QTQnzwNQwT6xn`t>o*U9T*D$XO3UHo~qdd
zGsy}S4SD|Jq2(_g+WH@iGdKG8{e|}B`R7NLe|}{7Ich5Qr30NlJ~*~~aBTVAch`DS
zz#a{NDaX(DFuSJ{X=H!AJJu|VIUUoJ=R?Pb_TE#(5Jv(@q)>nn#+YD=8Rl4Ei4_hA
zBA5_D2_u{cB8egaMHEv)DP@#XK_yi*V2EKx7-fucCYWT31srkA38$QK&IOlT@jxJf
z1rby*!G#b~C=o~`u_Tg8Cb<++N+kn@6jnr0#S~XUNu^W}=oSNAKG3NHO$)TC@FcF(
z!&$h!4QC#Qt#!Dw13PiJH63q;RalKSHY)8Jd{)`2$IlB8!m!>On=~u!s14UDal^RH
z$fs&YYf<gZ>{b*dX$+jM!l_q{sIt+7lasJnoB9_EBZOwL_7)2HWG}<5xEZx=11nb1
z{r1GPn$@r!>jm9kUYe%x_Ijt)f=T!~ft3U{l4LWUEtTrcq|sTM(W@)j{!8^J?%XK=
zu=>^fumEfxFIfEbAjOkUODV!{RzLe;vH0ETAFaRW;x-+gtXjLj*8Yk1&w(zP4u8Ey
Woj&H!KRf=B@X!5K=P!S@{enMJslW9A

literal 0
HcmV?d00001

diff --git a/scantext/tests/data/matched_text/binary_text/rules/gpl-3.0_rdesc_1.RULE b/scantext/tests/data/matched_text/binary_text/rules/gpl-3.0_rdesc_1.RULE
new file mode 100644
index 000000000..3c0984a8e
--- /dev/null
+++ b/scantext/tests/data/matched_text/binary_text/rules/gpl-3.0_rdesc_1.RULE
@@ -0,0 +1 @@
+License: GPL-3
diff --git a/scantext/tests/data/matched_text/binary_text/rules/gpl-3.0_rdesc_1.yml b/scantext/tests/data/matched_text/binary_text/rules/gpl-3.0_rdesc_1.yml
new file mode 100644
index 000000000..8f2188c97
--- /dev/null
+++ b/scantext/tests/data/matched_text/binary_text/rules/gpl-3.0_rdesc_1.yml
@@ -0,0 +1,2 @@
+license_expression: gpl-3.0
+is_license_tag: yes
diff --git a/scantext/tests/data/matched_text/ffmpeg/ffmpeg b/scantext/tests/data/matched_text/ffmpeg/ffmpeg
new file mode 100644
index 0000000000000000000000000000000000000000..c06345a809dba9dab8c95e37505411701fdd65c7
GIT binary patch
literal 6136
zcmeHL-EQN!71m6D(E1g+7sM9sWU!HJXR<+d23d@g*fRza+pwM44vHe6B+6z-5>-)h
z?7^b@4t<J6ulojlkv>8%_p;w1rP#`5+7}oFvBbl}!^1=Jd>{G4;A;5v$rGo3o;u$;
zRQmTnUK^49b)190|H72%=e2X{{1(4oIKOj#jq<OIjB2Rvb>sb0Wx9U<#?(3vS^Fja
zPoG-B>C@Ndqkey3#puI6wTJYk{&vsn>eIge`Of-Tza58}dTK|${utQ$AA`Wi>-VK=
ziq20R`>xwL49|s15|uKsDMYHJO7%<D)?UlGE;k~_C;L-d|M_Lh=w8c2uwt8uN{8BC
ziinqS!PYVqiUleZ!6UW23}v%+tXL%D0)3<$R&^!<8A%bM>znjLs_xRd{#jc;gLG)M
z+uHgvO0%-im1FSZ_wA7SWe08j+-i__sY(Sq(5lP>QQ50%CuPwzQ0vO{!%nl~MNyK8
zr868}PX}*qFZ+xB<!I)-om>x`PO0;bUh!OXDy_C|>rPv{h+q_DxzM6;D%noZ&vU+I
zg;LCyOAPEnm2rsP3&!JE1<Ws_NaUypZD<jXTm)*F%8w#sDl0HcjDpf~=2<L@LZ(Yr
ztOSCeBs>jcnF_R?2_90-UIXTNp2HGe1S?qL+RpT+%)>^^*O7FVvGRFbt(|^ZC^r;^
z2nyJ+k<~0xv{o=1&IM0tD6!N<UF)$iBr?fjk%$z{V7aM>nCnNi9LIGL$4G9@DB8?*
zu~2jFu0>9Z^{iX^Wm=S7uZJqXlyT^4gb(Vjof3!<={Q(%nVw;)LXpC4*Nvqg%G?<3
z;JdRqJ-IzOhBz%Z?lLW*BGcer1>bQQipSJ~;4BEB9&9#e%dft|mq@uE3wN2tyMkFJ
z)Nv@asnta;vy3KaC&S*s{>jhakjg~*7c9KTYjv>7m7wXx{<-gvud(7dg?V2l^)@Y5
zI|&9?#Clhbd8jkKSC&54`}NZI<C2!_eMj)m*GGOs5b&UB1i@1cjR<RX&odZ_R9XZo
zjU)}TiW-S?At=i{5K3Q$vLRa{aqKr->ZDOlRG7tO(|D4-h$-Rhx7!Q$&I^^5%_wAL
ztefrfBFP$t9-kKsNw2Ueo2n_#vSv)R+U(6HwaSFpG+XJtnS=(my~>+$FYeUO2Dnza
zFPo!TXHCm@e4X*&L!-Al;Wx*(IXd~ep|L^QZwNlF{k=J&!1BFaK;`9LdDH7HbKYD?
zVbFW=&2hu16v0XF^z=nT_%ZqGu8bukyUpOi*QOQfJXqHdIOH-$CMXokc#e4;4rhbu
zWVT=;UCUghl&RR7=aP0SAq#RAJ-`j7B9@3m<y+)xq#WABxyV#r5O4?sXqC3(?E0bp
zYrQYq`b%pRkfjTrQ!+(bZ+DMFx7|ImKa3ZU9EC6Me_Au!&s~nZ4|8yzOrbZ*zU&<m
zyyvkq_KwiF#rk!*Px#wn^SV4Q{X!P8XfbbCH5umE7H%xoB4!B?T^6g{LI{++GNbXq
zNo3MAg-Xn~iU1gyoG{-262ls^&<($aLjul2gs1?xA%F%XBv39FjIR-XimX@Zu|zIc
zP!x7fe1S*-&oVPfckqSrFq8y+SWiS&a{9my$T3BN2+IRM2!!@v)8AGdk*0lS@P7{=
z;UK*B6}mDkh5<?u2x37PtH|%$T}s$3;w>vRHP?4K;F$Fg<j69p4$u%FTOqHesE{|h
zj^QCgiso2zCH@JxUCn@>ZYDRkQF7m|?r@Z@d5moqtEHVCnsJX!mDUmwWJah>Skp4`
zfsC*QD9)7oZ4~pRW`AG@-OX_q5Br{VH>dRV+yV8=wz&!WLGYZlvF5NJ;EtU+7Sd9{
zqRq99*Ag)IwRh}w*};uLU0ZTbEC%=f);saK$6oK~IXmk1PP!-EZ%o6swmBXcLi6_$
zyY@4rTN1Db_>V)!XGs4H=|K6<kPc*${wE;4N|bH=U4J$njo-YiaAAu?PUNymJb)N&
z&OZ10Fxhr{J)DD%La+V8{Ikw}iBtg$$lyduaiEEcJK)zV2JmA6Cp-tC2bBlP2}a<S
zh8^T`Fi#n6TM(2f7`;iWz^nwRIWmPe;~Vw{XOSEzHZA>F2JA`(01#lKW*JE|9n$=5
z%cNxLeO~p>h6+|qGJVAaun`?p2O@S<cTib71QxUo07h^)uMp`uyxaoR=<r>&^I9!C
z%-v{0Cc!TN2P<FzK41gGxs!3sfX;xfsEiL=gzn#s7H=mv3)UaMXPAOnf4q4A3Yx_V
z2+M#X)(#y2almH?V9wKG3r}0ugW1Jf==aY@SEI#yg2CZvF&@n4Y&e;*KAZMui_yi+
zRe#2&H?!$vKJXZuBN*_k^*@402U0LJz_KVnH9!zr?=fW>F5$4f;%h@Ngj~ZRj&%Ym
z{(ont#bX7)H*BP>UojaGt{*ZYV{k~BXbtygs<oS`LpDkS?~r}nV_;h82b?t7yr7fL
zAsfmFtYa7}l^?Qmr3;lFvg<zU9`$-%xA&skV>k1D%?Ch5*(H&@O96UfC`ipLz|;n)
zz}?LQvFp-31K@^O;$_VOFX_DbK+_vkd~lFHQt1)HtofzoNyH9`XF_UN=1xj%6VjS%
z?kc>IsTid=%41^@?^G!WmEm1v&-1RR<rOFnNfWwCq4U12^X64u3~964wT}taypD=x
z<yo!@6{r{yFn33ESz#V~u|$Dk3**-UOpY#5YQ}~NZ*E3{ZAvTeo)^Q_3;05JXv7vP
zh6nvVq&6Co?x{N12za659V!et$L?xWj8Sp9#a$ROfu(VEE-^=8EY1fFzDI+(z1YH9
z2NkWat?F@2<pw<6a#0#Ig_YXehsA_}RT_q!BVSetq2)~;s!z-qKBexxHP+waiiI*H
z<3S)|q&GS<SRYz2I3*9JpvySD?907#$j@bgR4RB<HyB>^-^|&#e?6ePy!siR-(F0v
zr<3tuyqL4|(PDl(9n5a0qdyL=T4w%j1lmFU$B^Yubk)8c)`sYf%wNZAl!g+=*%BGK
zI^-Z~4?4db%r9o6>0&e)*Ug;PyujpR+;y}2v(e968^`CPMtgFjzD3p5xy){b>LRsT
zr=+KwE3jU1f(AkL^n}$aItT?$oc0dTW?*|2NcW@kELH?-_EvQT(5Y6hSqMeF1+dsv
zxm=ml_RL<6JgZ=&K1tQ*lWOkjo7}wi{^9**b#$f-T{WrvstjIB>~-W>o-fPfUv}{S
W`N6#H{q~dk!ra!GJN@d@y#5O*7h!Dx

literal 0
HcmV?d00001

diff --git a/scantext/tests/data/matched_text/ffmpeg/ffmpeg.exe b/scantext/tests/data/matched_text/ffmpeg/ffmpeg.exe
new file mode 100644
index 0000000000000000000000000000000000000000..5a9b37bdb1d6e965201cbd5d028ae82b9333203f
GIT binary patch
literal 16136
zcmeI3OK%)kcE>Me1BrX$L4bG>WRT32koJ%cyGcp5<4_jIG%3mi^dLyemZK4*>Z;q_
zWz|dd=xz$eAd9R51n?psAxN^w#=syLWHn!4fP8~EK$da7K#(ZEb8l5YD2-^(x|{5-
zs{4Na&;Oiz>h@oLrCw7?z0UXfbESUCRX!it-&g<q712NZ@xT61{o;*ZU;O3DtzTa}
z8HR<<@^p}UvG%<rNlV=`I<FEPCVKDgv5wQgbl-fl^`~{whc}eEwep7g>!)8|XlnaL
zZM^n_l?y*o|M!n8>e7!csK5C;_03zv{yty*Pb;={a!(FvHv#s_MX7(I90LFLFaG$#
zYw9DSw96-%B@dta;vOD<ec=yx-R*Dl`}(iQ|B{UV`Nh{4zIx*&5p__vG?SA1f4=z-
zE4D$OaFx!?eIBc?9(VK9ru?H{(3Que`4_IYxXyjFI(*y>ywW2q`1{w@PF1Fzz?8->
zO`wY~c~a<pn(KD4)gsBRo0ljoN*CGHcbKOz&LR_=ggo8WOA@tIk^38d98_f(6<uG2
z`ucU9^d9+4sb8p_qA+<GrU~`vz86N+?Uhn#J7_80>8R|<pyQ`WKOFpA{ak6STJ6F;
zSGgxSX<oaKKWV1wbWGy)BGVaU5!vg9k;w~^UY&&mlaY62_85h|xIdXaCnJ;ivqx_f
z7N<!$oQ3qBk!AMSPm|KMYA!4{;buO29wb#66|?7%45Qg2<$1+iG|#<=KAtWl=@nPz
zswIu@`QAc`er`;8nwf>bL44-9h!^6+s5jSLie<}XQ8idd6lYf>?@U2y#z3XH>3DIT
zEVgVuYW!l<@XGt|E)*~6zsSsDooQArj!9m|*+RA=@v>}jkcxCNH}3SN$c&krT|&yC
zN%O^lE63>z-5Z5LQF@DwAEnaSac^OTP~<4hdm!0#(MeFYSnJpuWuE_JA<fwI7CSUK
zJ1_*iw~%GxjTY;f(9VSv&%$_q9G}H=U1j4viP+z>46>G0J5asQulTBd<P8d?+F7Sv
zcB6Elw$yd@*1p@HWvVkTFO1&bKe};v_vl2MWEAFUA~>K&ULMK@++q{ARG%HKA~Q0P
zI!H!dL^E@frFp3(M%$Q{x|cjj(s81nm{a|ecJULz{ibl)-KxF$ZqVMmV!xf{My~2^
zRqC{_V-u(OsqR%_6okoOp^}6(+fw8C_RCUh(p+j6mt13kASOE({1ZT;PFt7#v8Y6a
zTT-k2X&t$ajPtN8DUM;0qCmm4iUKWC%!{Jbr&0aB$$`fk?34f`QhTPp*;ie4tnRCi
z)!t%HES3rM>QB4!(bx5-VPMjh-f5n9svzX~xOqOVdSzHfrlq_4b(Vc3s_EE4)^QaP
zJ<N?K14N>(<fPo=DD;1GqV9d0XjkA`v%1YBGkGX8?L|^ZxBIazq{?Jg3mxR%_|Y)U
z!)K3rUJ|$@-94#ink38-G7NvhV8~>}rW~e$zT9F($_L<Y`+PfBHVamf(yLZlaJ!~t
zU~bR`E2WYu?wPz6Q;ajKDy@>h&8AW(L&I{6D$}GGds3br4?}+_osl^jGbk*#c3*e+
zvEx21KTJw6%1Oal@f5*aHf<oy%Tolkd{PKmGhO|%#%O)m4}B}W>b$PWop$H)pBLIz
zU@LJ8rdcnF%=|xS7Y`A7n$Jy>nB0qCO_|pVqp%E3v89zAe;IzQg8Z^pC0xqq-b6?f
zRm7}IE>*8D>!Z+$N!7|9iU8A!z3s*R&`SnJt7bP-hFv-&PSV8K%1@fbbhdh1td{Bv
zSzF3JD|hcfGi>g9Ik4AkG_stgGwHzwKt$J?^+JLyoR~-kD)vg7h9$9(pzwr-=%$R9
z#Z7aweik!JR};!-Y|=`~7lCHCDNEkn+)1@tfdVDX{8`4sW|xiOK&#LW#z2Do{TNBM
zZWm3G%B;yM3W0>&Xm13N3PyJ8bUF?P!%`=P&XQTkFIC5xr9-knwHE|TZkjOHP(YK*
z4WOL^wx$4D&a^`owiGc)!e^A(JFP1s<z=<WLg;(I0k}JtD9tPnlde$S?XZ9Y4ruS7
zvbZkoUr4f(3pXiK+8Yj}33KwHFd_(K=h=}tlegKQhe#1yhaTi<l?jf!8d$@2sZ;7*
z*D4-Y+5U6t0YX0t-FVrwV25S`?Eshgsg%o5N~%+qA`&GNN#RP9h(t5n5@rPvog1-K
zwGc(Gt8b?xqrruu$<Z_PL4v>%5nRk<&qQf5D5ym`zate%rF9iegWc>kgV?rnbzJ$r
zDT;m-(KaheA-yk*0i_0}R}EC(U6eYa#fw$QVo6yZZgt9zi>nEU$XYhJ<=$f<;6nwA
z8OzU59v{&cwvs$Iw`$LVGVQ7CloaE__vmLJP%qTE*l?Me8EW9OaHwvAb<Gy^U<DOl
z8K(||2Mc5KKI+HK&<k?rIpM63#lFzpV`kKaN`5v#OpjnHhPWdqx0>O#%L?J6G#&AW
ztOE<}G85;oT}ic=&q{&WT9?vEG@quPR$H%KiHj3k=U6Ov=hJ%i+FUQs;6_6h!Q9Ng
zBK`Mh(<WW`O*`BU)JYBw&8kH(8rHZKp+bOOoCMk9VG<TH-o+~>3yNo0Uk&4fK{^g1
z{HUuBQ=kR(bL_9MJJv~ZSusd9N28urp(&9@i4aos)JPdwIcn_X35`}^(oYrpwU-tK
z3Fjebnu=&=mUgx+jn!}K)y>Jfn|$<_dUNuD+^wmRay(QuJQD=9>kU{<42}316y#?U
z*xp19VP3*!khRl3#opwRqj$DbcHMQ@^n$!|O0gS9aOCc>5Lt(OwKmGL#T#~)zS_Og
zy&`K@@21&lZpFc>zlJ#Mw2=vH2PaW<AY2;y3X2^0zJ9O!Zg*3!-rU_?qd1|zj?sXh
zQs1JYJR|%|<#=98TPnv(<+xK@iePguQI2IVeYSIS=ituGt(te8BA~=7RkM639K0G)
z)KcC^j9uh_&0j7SlQ{tJMpj^xvX;c&h<z<onGm)xI8zH?uDNma7Dy{f%%nagL;YOg
z?JGgIebs*ThMs#8KmuPt9<c}MBJGy|r@3Z15=K_&VSp_!57C+|ok7%YAeXWGg#FVN
z{02n|CLz#6dt#uuZy#>nxi?ii4yzsnU*DqT2~r;soJok4553ctg)OQ5xUOCAi+X1D
zo@?5`YpI|HLw&WWpe}74S<qU=76_?pnCBX_?G$oDgPMHZ7WP2t=Bzh?m=C1xAvnnm
zWzd~*h|&YIhMqvY)?1=2es*y3$=!P=dgspPn&~~-xpVUQHOmEHS{kHv<p`b&)nov3
zFDXxHY3uflquozPzw`0It%H-#h3f1doZPu_e608H9_bx@xN~%JuzT;;&XGR6cXW97
z_(m6<hQXj^tyc^pD}qTt4r7Z&JVO{;pF?Ylp|vUuJ%}l!-Gn3IJS;9uU+2zD%M+^V
zNDdTK>w~@ss&!bH(H{=WGTYkN7>~zYOqAU;A8d%)#*7vl+YklhrMiLOM`F9>ijKtE
zYUw@E{Pzr$Q%J;=38M4gAma85!A`TsQj2PT`90BPTd20O?kVv=-4gj{^(nD??1^o`
zRlsI>p@KTnnpDn4cEx5w3#`S2A!1L99b%^|6{!FEgKt;V?<)0G|0DI^a?j^~>M&h=
zr2d!t-+AlXl{eq|*4n*L{%Z1PwZFu8xz%cfT!|7VYKA58Hq>qK<C@)CB1im87n*7`
zHcHrn<y($FG$6DT&r9*_PB8%brS1BB+qF^*R3keP&=$S6^v%OtjhqnqB6_jN)}OND
zZzKLZZO=&>$&GX<$EL=7yFHyOZVUyP7r@7z9YR!X5i#@0oQYvBm27IUcXdLrlv6H1
zvM$kPo3Cy@yeWGwtflUN6k@h>l}M7>d?5xUSOFRV%Db5LkqfKsU=85*!`!bT;HoYO
zOBa%{Ipb?7)aMOBzZXBA=c~g!MO<2~wZMvWy)7y}hZm+SxStjSir0zc7>F2m<mcwY
z+8H*Qv%hz;C*>n&4L|4Lujp)ZMhHDIUR-Q%DmvJ+C~re#7P8Eq5Yluo6a%ic_DC{k
z>~&d5*s^&UlUIb)2ahc!M^frwkYK*1Kn57GpCc&~MmUT;4jAfE>@h)!ZKO6Cf0sO%
zyQeu$#2^a~q82x#BEw~rrEAEGb`;TaE4dzFtuBq6yJEVljj(D8k4s509ST<5QjuFK
zat()ixgrN%%pudclHwvJ($4moeW>4lTe}EHhi5x<76THMsh>u#jO#_fsomMLlhvR?
z*8m!fwy_)2-QQCC7EP^%2}GJk)MQlcl%;1ap;_pc6;gpjQ_#~xl(Votffb-x*I|uw
z;5B{B&C|+Y4|Wjx;dQ+!Qeh9lk^!DTdtPO7Fj~XaYF+Ym0Bfx^wTjiDA*$oVf-hyv
z`UaZIfv9xORpVrVJsO#|bXM5Cll{&IqRTOraCjiRKH<dX&LPM(OrfX1J>uKiPCG<D
zLd0>d=+}(Br7ogsmy^ymr*!tvq_gene1s_KY>WGg{_;x>WL}Yo49J+osLGvEJJ%ID
zwrh`mHYe>#V+^nLPBHAWw2BfLj1}=gJ9sFAIYp`^XitY?3EE3~|DEZ*#o==?dtEm>
zRpG-@rWy1%hdpsnR;F?NAi)k)Y@}W-#vB!zU`^=a0FzsRfu+gv@`TQsk8^^!C9Df^
z%ME@Wpbr#O4%XY^xd1!f;M4(UlKWzzS0w9YK5c9l_GhDA@NaL44<>C6zoGA3*OY#6
zjP<CEyGfw;n!AU#E;%G$*H59_oDXof8*vH}=v~hqa2A9RP(<`YYW(CG74XkzYDDDz
z)3B&;$$91+5wM~8RZja$6p1w9XKK-L1Yh(W&nJxlIA@YT`IAPq5H&$;X2%D&_m;K{
z+-X9bE0%tEOFz8t?}uj@YO_txMZ3n8V_0uUHS8&LV|~_t1Lf94nIMqFisjW^x3-#l
zXXLC8D6m>QRUW*@ALePSPVo+st4~N&#Y8CUy?{A<fParIi}2epqU-JZ>)NKSpWr}&
zwnw+ZKy}kIph@VdsKD;LSN-bw^XLD*11{YZpc21fcG2woV`FxUA2DFl+0G&_!MuS-
zahNnu=U5hOzFt-|nYeXj`9p|Zg)>)OTJtPznbMUdbDIvulDRFJ+jnGcvyFQ$oTYxG
z(xT5dS0VQy_aXPCT=8Q9k5t8Xrc{cnnS(BI60%AxM97&*YrjCJrbDDePpg$+OHyX(
zfd%meJ1@f&l&p8uMq}6%g?iufODxRWD9*2bsbNz&bL^F4vG+;(I865KsqZwzrI<b*
zkB`M0E8mh++sYTr+`t-)1~0cRv9v+D*xBZ{rB>f@!G~$F_8@$yw?(5r8~CB7L!YS^
z!v9Qvp<SG4#}{2m4?_J;U6*YbiA1fPwe6Y7IqAYPmtg?tWr%1V=7lkSFSrer`?ix)
zYwHqMmn2an{E{U8mL#!p(me0Ht9`k9702!`@n}?LIMmKBt_H%NUWA{aVe;Sh(Iq=_
zwm*Hz!M^E0xV&n?PbJUp#|BpFghLR~kDbbEU9j;%<s~sJbG4(p0gkLCu}%XBTlG?m
zfn%`R9x+`Nx4ezZo12&AobLVE)BYizb4uN_Ha_fd9nN0xA<1jMaaW`D8n=*s_Do)m
zT%>*YI`9IQH`&s|;Z_`P6~%+CYu$%^@%Wcl`x*zlDZg1#z3EUasos+6eMhS2eBaKc
zc{Pu6si!pR;wATnm~pR^+5^!`?9JIn-gT81kDIT=$UX1$E_}tQN5Yz`eZ{!(ffE)e
zZyt#URUfU^9^_k(EZha|kHS6YGNf05&F&@#>G)sau-^EvO&!g^DLGZ)xOPBUVj2^j
zmdB9ypbGL}2o&hy6hPCK-{boYDz03ATtELu{ro39|A4Pt`K+w0*rvOu@BMp#<!&(G
zG{^R=p9;>x#I`i=MbM%+v-VAxqgeDHceH(gXTW+3-CR7gPjNtluDMh2#vUq^&bg$`
ztbv`;$O;pD)ZP)FGAYk$`g8AT;AWktkXCs^0N@rM!=ad36CA%wy!PbXIBbqBjb(sS
z8fzn%wlPiCt-~TGJ>-%>minI1K=;01aaxon?vnY0u6cEEhgXS8IbxDG%EjBw<;C+a
z(6>!2u^+roo>g&qqa+v5rPei4@3>*JXZ4)Q%j<o@vT-3~-B@SJRs2fC!9xE(suGPP

literal 0
HcmV?d00001

diff --git a/scantext/tests/data/matched_text/ffmpeg/libavsample.lib b/scantext/tests/data/matched_text/ffmpeg/libavsample.lib
new file mode 100644
index 0000000000000000000000000000000000000000..27ff56d4f012ac718c6b649f40015f924af2db2f
GIT binary patch
literal 1783
zcmb_c&2H2%5caA_pcN0{9#%cM+1+lpw4z8A3$#)T2nyHYb(~$Rj%{owoA$z`FT)G)
z0^Cb4TzLad@BmCU+r}vfs36iL^YhK;u|1RJ`_!`OgLZ_QPj7v5bFkT8>-L-e?!xSC
zbl0lTn>IqNj|iP%gwDVG7irzTK&{F5f0K6>wJtvVR_-E%KDQ8BLPvKHnt)tB??G8Y
zx8PlAW$5c1xx9d!FTPDog)VtN&Cw<2*&len&5;kcqC>5vi*ja;C($?)GWM2p7bQt*
z_#|?cMFZOF(P2V1*ZTdzaAW;mca=#_RV2mO%`A5`HIhfQy2{XwbKH8N74A5kP$7AY
zsRv`L6Sol|#_~iI+YxK5ZzwyaldIK_is69Fgo^dd5$CBFjHEHidxJhE#6#h1N`=}^
zUCzM46&*`Xs7q_(q_Se+=CN1@z?hG-Ne#7`@*?GgnJng>GY_H5yy`R>4kPJF(_!_P
z*K{;zI_AN0G>>^MSOb!1o5HcwwdaYDp4))fXy=U$>#h&FPINQJs4TfJOQyMSC(2I)
z*Z34}EU*RBDi3UkCh;+$w5<A)N1moMG{rbIYm%nnS}co0jLNwRDHQBHv{{ymIN;O<
z@ba3&xzGcpX_L7C<6>EM!Bv^ii;5Z(-bvY?x(E-m^cc>IW}=^RZNm)!EZ24{%Jmjq
zRl3Zzycx~Qj%|_^{o&vz&w5R?xC35zO9@LPow)54GTPmF{pje`)4i96qodvJ?g}Kp
zL=w26L`+BpBa0y@$-;DUFsfZeu>c>8P`TZ!0mTxUPdUDY_n$o9$2qsIoX`Gh4{M7h
zg|<VQNu*r@Vvzt{fG+0x6k=19fjq9J*=y{$$J&%r-bOF@+l)I8V=k1*Jg$BUc+ub;
za65?PR0^2?8mu#a19XGhLdEzmiGPLNo7V|{5T&?;{hvTBxLb6jEP`Uov&odpjL{Z$
Ox7tR}s*-P>*!}@@M?K8|

literal 0
HcmV?d00001

diff --git a/scantext/tests/data/matched_text/index/rules/gpl-2.0_bare_single_word.RULE b/scantext/tests/data/matched_text/index/rules/gpl-2.0_bare_single_word.RULE
new file mode 100644
index 000000000..c0e32dd8e
--- /dev/null
+++ b/scantext/tests/data/matched_text/index/rules/gpl-2.0_bare_single_word.RULE
@@ -0,0 +1 @@
+GPLv2
\ No newline at end of file
diff --git a/scantext/tests/data/matched_text/index/rules/gpl-2.0_bare_single_word.yml b/scantext/tests/data/matched_text/index/rules/gpl-2.0_bare_single_word.yml
new file mode 100644
index 000000000..d78d0c44d
--- /dev/null
+++ b/scantext/tests/data/matched_text/index/rules/gpl-2.0_bare_single_word.yml
@@ -0,0 +1,3 @@
+license_expression: gpl-2.0
+is_license_reference: yes
+relevance: 80
\ No newline at end of file
diff --git a/scantext/tests/data/matched_text/index/rules/gpl-2.0_or_apache-2.0_2.RULE b/scantext/tests/data/matched_text/index/rules/gpl-2.0_or_apache-2.0_2.RULE
new file mode 100644
index 000000000..995ec316a
--- /dev/null
+++ b/scantext/tests/data/matched_text/index/rules/gpl-2.0_or_apache-2.0_2.RULE
@@ -0,0 +1,3 @@
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
\ No newline at end of file
diff --git a/scantext/tests/data/matched_text/index/rules/gpl-2.0_or_apache-2.0_2.yml b/scantext/tests/data/matched_text/index/rules/gpl-2.0_or_apache-2.0_2.yml
new file mode 100644
index 000000000..41746474c
--- /dev/null
+++ b/scantext/tests/data/matched_text/index/rules/gpl-2.0_or_apache-2.0_2.yml
@@ -0,0 +1,6 @@
+license_expression: gpl-2.0 OR apache-2.0
+is_license_notice: yes
+referenced_filenames:
+    - COPYING
+    - LICENSE.Apache
+notes: seen in RocksDB
diff --git a/scantext/tests/data/matched_text/index/rules/mit_101.RULE b/scantext/tests/data/matched_text/index/rules/mit_101.RULE
new file mode 100644
index 000000000..722e438fd
--- /dev/null
+++ b/scantext/tests/data/matched_text/index/rules/mit_101.RULE
@@ -0,0 +1,2 @@
+This source code is licensed under the MIT license found in the
+LICENSE file in the root directory of this source tree.
diff --git a/scantext/tests/data/matched_text/index/rules/mit_101.yml b/scantext/tests/data/matched_text/index/rules/mit_101.yml
new file mode 100644
index 000000000..ca1a71366
--- /dev/null
+++ b/scantext/tests/data/matched_text/index/rules/mit_101.yml
@@ -0,0 +1,5 @@
+license_expression: mit
+is_license_notice: yes
+relevance: 100
+referenced_filenames:
+    - LICENSE
\ No newline at end of file
diff --git a/scantext/tests/data/matched_text/query.txt b/scantext/tests/data/matched_text/query.txt
new file mode 100644
index 000000000..d5dc1521d
--- /dev/null
+++ b/scantext/tests/data/matched_text/query.txt
@@ -0,0 +1,4 @@
+#  This source code is licensed under both the Apache 2.0 license (found in the
+#  LICENSE file in the root directory of this source tree) and the GPLv2 (found
+#  in the COPYING file in the root directory of this source tree).
+#  You may select, at your option, one of the above-listed licenses
diff --git a/scantext/tests/data/matched_text/spdx/query.txt b/scantext/tests/data/matched_text/spdx/query.txt
new file mode 100644
index 000000000..0ef045154
--- /dev/null
+++ b/scantext/tests/data/matched_text/spdx/query.txt
@@ -0,0 +1,12 @@
+@REM ## @file
+@REM # Makefile
+@REM #
+@REM # Copyright (c) 2007 - 2018, Intel Corporation. All rights reserved.<BR>
+@REM # SPDX-License-Identifier: BSD-2-Clause-Patent
+@REM #
+
+@echo off
+setlocal
+set TOOL_ERROR=0
+SET NMAKE_COMMAND=%1
+SHIFT
diff --git a/scantext/tests/data/matched_text/tokenize_matched_text_query.txt b/scantext/tests/data/matched_text/tokenize_matched_text_query.txt
new file mode 100644
index 000000000..f4d5c8efa
--- /dev/null
+++ b/scantext/tests/data/matched_text/tokenize_matched_text_query.txt
@@ -0,0 +1 @@
+the MODULE_LICENSE_GPL+ foobar
\ No newline at end of file
diff --git a/scantext/tests/data/matched_text/turkish_unicode/query b/scantext/tests/data/matched_text/turkish_unicode/query
new file mode 100644
index 000000000..19adb4ef5
--- /dev/null
+++ b/scantext/tests/data/matched_text/turkish_unicode/query
@@ -0,0 +1,20 @@
+# Licensed under the Apache License, Version 2.0
+next_label=İrəli
+
+Some stuff here
+İ license MIT
+
+next_label=İrəli
+
+
+İ license MIT
+
+Some stuff here
+Some more stuff here
+
+# Licensed under the Apache License, Version 2.0
+next_label=İrəli
+
+lİcense MİT
+
+some more
diff --git a/scantext/tests/data/matched_text/turkish_unicode/rules/rule1.RULE b/scantext/tests/data/matched_text/turkish_unicode/rules/rule1.RULE
new file mode 100644
index 000000000..f0ec0e607
--- /dev/null
+++ b/scantext/tests/data/matched_text/turkish_unicode/rules/rule1.RULE
@@ -0,0 +1 @@
+İ license MIT
\ No newline at end of file
diff --git a/scantext/tests/data/matched_text/turkish_unicode/rules/rule1.yml b/scantext/tests/data/matched_text/turkish_unicode/rules/rule1.yml
new file mode 100644
index 000000000..864a8c3ca
--- /dev/null
+++ b/scantext/tests/data/matched_text/turkish_unicode/rules/rule1.yml
@@ -0,0 +1 @@
+license_expression: mit
diff --git a/scantext/tests/data/matched_text/turkish_unicode/rules/rule2.RULE b/scantext/tests/data/matched_text/turkish_unicode/rules/rule2.RULE
new file mode 100644
index 000000000..7ca4781d2
--- /dev/null
+++ b/scantext/tests/data/matched_text/turkish_unicode/rules/rule2.RULE
@@ -0,0 +1,2 @@
+# Licensed under the Apache License, Version 2.0
+next_label=İrəli
diff --git a/scantext/tests/data/matched_text/turkish_unicode/rules/rule2.yml b/scantext/tests/data/matched_text/turkish_unicode/rules/rule2.yml
new file mode 100644
index 000000000..a4f80f07b
--- /dev/null
+++ b/scantext/tests/data/matched_text/turkish_unicode/rules/rule2.yml
@@ -0,0 +1 @@
+license_expression: apache-2.0
diff --git a/scantext/tests/data/matched_text/turkish_unicode/rules/rule3.RULE b/scantext/tests/data/matched_text/turkish_unicode/rules/rule3.RULE
new file mode 100644
index 000000000..7b767dbba
--- /dev/null
+++ b/scantext/tests/data/matched_text/turkish_unicode/rules/rule3.RULE
@@ -0,0 +1 @@
+Licensed under the Apache License, Version 2.0
diff --git a/scantext/tests/data/matched_text/turkish_unicode/rules/rule3.yml b/scantext/tests/data/matched_text/turkish_unicode/rules/rule3.yml
new file mode 100644
index 000000000..1443a0848
--- /dev/null
+++ b/scantext/tests/data/matched_text/turkish_unicode/rules/rule3.yml
@@ -0,0 +1 @@
+license_expression: proprietary-license
diff --git a/scantext/tests/data/matched_text/turkish_unicode/rules/rule4.RULE b/scantext/tests/data/matched_text/turkish_unicode/rules/rule4.RULE
new file mode 100644
index 000000000..d00dc0e28
--- /dev/null
+++ b/scantext/tests/data/matched_text/turkish_unicode/rules/rule4.RULE
@@ -0,0 +1 @@
+lİcense MİT
\ No newline at end of file
diff --git a/scantext/tests/data/matched_text/turkish_unicode/rules/rule4.yml b/scantext/tests/data/matched_text/turkish_unicode/rules/rule4.yml
new file mode 100644
index 000000000..864a8c3ca
--- /dev/null
+++ b/scantext/tests/data/matched_text/turkish_unicode/rules/rule4.yml
@@ -0,0 +1 @@
+license_expression: mit
diff --git a/scantext/tests/data/matched_text/unicode_text/main3.js b/scantext/tests/data/matched_text/unicode_text/main3.js
new file mode 100644
index 000000000..f0ec0e607
--- /dev/null
+++ b/scantext/tests/data/matched_text/unicode_text/main3.js
@@ -0,0 +1 @@
+İ license MIT
\ No newline at end of file
diff --git a/scantext/tests/test.py b/scantext/tests/test.py
deleted file mode 100644
index 33eeb08c8..000000000
--- a/scantext/tests/test.py
+++ /dev/null
@@ -1,25 +0,0 @@
-# SPDX-License-Identifier: Apache-2.0
-#
-# http://nexb.com and https://github.com/nexB/scancode.io
-# The ScanCode.io software is licensed under the Apache License version 2.0.
-# Data generated with ScanCode.io is provided as-is without warranties.
-# ScanCode is a trademark of nexB Inc.
-#
-# You may not use this software except in compliance with the License.
-# You may obtain a copy of the License at: http://apache.org/licenses/LICENSE-2.0
-# Unless required by applicable law or agreed to in writing, software distributed
-# under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
-# CONDITIONS OF ANY KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations under the License.
-#
-# Data Generated with ScanCode.io is provided on an "AS IS" BASIS, WITHOUT WARRANTIES
-# OR CONDITIONS OF ANY KIND, either express or implied. No content created from
-# ScanCode.io should be considered or used as legal advice. Consult an Attorney
-# for any legal advice.
-#
-# ScanCode.io is a free software code scanning tool from nexB Inc. and others.
-# Visit https://github.com/nexB/scancode.io for support and download.
-
-from django.test import TestCase
-
-# Create your tests here.
diff --git a/scantext/tests/test_match_text.py b/scantext/tests/test_match_text.py
new file mode 100644
index 000000000..975a03c63
--- /dev/null
+++ b/scantext/tests/test_match_text.py
@@ -0,0 +1,1495 @@
+# -*- coding: utf-8 -*-
+#
+# Copyright (c) nexB Inc. and others. All rights reserved.
+# ScanCode is a trademark of nexB Inc.
+# SPDX-License-Identifier: Apache-2.0
+# See http://www.apache.org/licenses/LICENSE-2.0 for the license text.
+# See https://github.com/nexB/scancode-toolkit for support or download.
+# See https://aboutcode.org for more information about nexB OSS projects.
+#
+import os
+
+from commoncode.testcase import FileBasedTesting
+from licensedcode import cache
+from licensedcode import index
+from licensedcode import models
+from licensedcode.index import LicenseIndex
+from licensedcode.match import LicenseMatch
+from scantext.match_text import Token
+from scantext.match_text import get_full_matched_text
+from scantext.match_text import reportable_tokens
+from licensedcode.match import tokenize_matched_text
+from licensedcode.models import Rule
+from licensedcode.models import load_rules
+from licensedcode.query import Query
+from licensedcode.spans import Span
+
+
+TEST_DATA_DIR = os.path.join(os.path.dirname(__file__), "data")
+
+
+class TestCollectLicenseMatchTexts(FileBasedTesting):
+    test_data_dir = TEST_DATA_DIR
+
+    def test_get_full_matched_text_base(self):
+        rule_text = """
+            Copyright [[some copyright]]
+            THIS IS FROM [[THE CODEHAUS]] AND CONTRIBUTORS
+            IN NO EVENT SHALL [[THE CODEHAUS]] OR ITS CONTRIBUTORS BE LIABLE
+            EVEN IF ADVISED OF THE [[POSSIBILITY OF SUCH]] DAMAGE
+        """
+
+        rule = Rule(stored_text=rule_text, license_expression="test")
+        idx = index.LicenseIndex([rule])
+
+        querys = """
+            foobar 45 . Copyright 2003 (C) James. All Rights Reserved.
+            THIS IS FROM THE CODEHAUS AND CONTRIBUTORS
+            IN NO EVENT SHALL THE best CODEHAUS OR ITS CONTRIBUTORS BE LIABLE
+            EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. chabada DAMAGE 12 ABC dasdasda .
+        """
+        result = idx.match(query_string=querys)
+        assert len(result) == 1
+        match = result[0]
+
+        # Note that there is a trailing space in that string
+        expected = """Copyright [2003] ([C]) [James]. [All] [Rights] [Reserved].
+            THIS IS FROM THE CODEHAUS AND CONTRIBUTORS
+            IN NO EVENT SHALL THE [best] CODEHAUS OR ITS CONTRIBUTORS BE LIABLE
+            EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. """
+        matched_text = "".join(
+            get_full_matched_text(match, query_string=querys, idx=idx, _usecache=False)
+        )
+        assert matched_text == expected
+
+        expected_nh = """Copyright 2003 (C) James. All Rights Reserved.
+            THIS IS FROM THE CODEHAUS AND CONTRIBUTORS
+            IN NO EVENT SHALL THE best CODEHAUS OR ITS CONTRIBUTORS BE LIABLE
+            EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. """
+        matched_text_nh = "".join(
+            get_full_matched_text(
+                match, query_string=querys, idx=idx, _usecache=False, highlight=False
+            )
+        )
+        assert matched_text_nh == expected_nh
+
+        expected_origin_text = """Copyright 2003 (C) James. All Rights Reserved.
+            THIS IS FROM THE CODEHAUS AND CONTRIBUTORS
+            IN NO EVENT SHALL THE best CODEHAUS OR ITS CONTRIBUTORS BE LIABLE
+            EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. """
+        origin_matched_text = "".join(
+            get_full_matched_text(
+                match,
+                query_string=querys,
+                idx=idx,
+                highlight_not_matched="{}",
+            )
+        )
+        assert origin_matched_text == expected_origin_text
+
+    def test_get_full_matched_text(self):
+        rule_text = """
+            Copyright [[some copyright]]
+            THIS IS FROM [[THE CODEHAUS]] AND CONTRIBUTORS
+            IN NO EVENT SHALL [[THE CODEHAUS]] OR ITS CONTRIBUTORS BE LIABLE
+            EVEN IF ADVISED OF THE [[POSSIBILITY OF SUCH]] DAMAGE
+        """
+
+        rule = Rule(stored_text=rule_text, license_expression="test")
+        idx = index.LicenseIndex([rule])
+
+        querys = """
+            foobar 45 Copyright 2003 (C) James. All Rights Reserved.
+            THIS IS FROM THE CODEHAUS AND CONTRIBUTORS
+            IN NO EVENT SHALL THE best CODEHAUS OR ITS CONTRIBUTORS BE LIABLE
+            EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. chabada DAMAGE 12 ABC
+        """
+        result = idx.match(query_string=querys)
+        assert len(result) == 1
+        match = result[0]
+
+        # Note that there is a trailing space in that string
+        expected = """Copyright [2003] ([C]) [James]. [All] [Rights] [Reserved].
+            THIS IS FROM THE CODEHAUS AND CONTRIBUTORS
+            IN NO EVENT SHALL THE [best] CODEHAUS OR ITS CONTRIBUTORS BE LIABLE
+            EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. """
+
+        matched_text = "".join(
+            get_full_matched_text(match, query_string=querys, idx=idx, _usecache=False)
+        )
+        assert matched_text == expected
+
+        # the text is finally rstripped
+        matched_text = match.matched_text(_usecache=False)
+        assert matched_text == expected.rstrip()
+
+        # test again using some HTML with tags
+        # Note that there is a trailing space in that string
+        expected = """Copyright <br>2003</br> (<br>C</br>) <br>James</br>. <br>All</br> <br>Rights</br> <br>Reserved</br>.
+            THIS IS FROM THE CODEHAUS AND CONTRIBUTORS
+            IN NO EVENT SHALL THE <br>best</br> CODEHAUS OR ITS CONTRIBUTORS BE LIABLE
+            EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. """
+        matched_text = "".join(
+            get_full_matched_text(
+                match,
+                query_string=querys,
+                idx=idx,
+                highlight_not_matched="<br>{}</br>",
+                _usecache=False,
+            )
+        )
+        assert matched_text == expected
+
+        # test again using whole_lines
+        expected = """            foobar 45 Copyright 2003 (C) James. All Rights Reserved.
+            THIS IS FROM THE CODEHAUS AND CONTRIBUTORS
+            IN NO EVENT SHALL THE best CODEHAUS OR ITS CONTRIBUTORS BE LIABLE
+            EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. chabada DAMAGE 12 ABC\n"""
+        matched_text = "".join(
+            get_full_matched_text(
+                match,
+                query_string=querys,
+                idx=idx,
+                highlight_not_matched="{}",
+                whole_lines=True,
+            )
+        )
+        assert matched_text == expected
+
+    def test_get_full_matched_text_does_not_munge_underscore(self):
+        rule_text = "MODULE_LICENSE_GPL"
+
+        rule = Rule(stored_text=rule_text, license_expression="test")
+        idx = index.LicenseIndex([rule])
+
+        querys = "MODULE_LICENSE_GPL"
+        result = idx.match(query_string=querys)
+        assert len(result) == 1
+        match = result[0]
+
+        expected = "MODULE_LICENSE_GPL"
+        matched_text = "".join(
+            get_full_matched_text(match, query_string=querys, idx=idx, _usecache=False)
+        )
+        assert matched_text == expected
+
+    def test_get_full_matched_text_does_not_munge_plus(self):
+        rule_text = "MODULE_LICENSE_GPL+ +"
+
+        rule = Rule(stored_text=rule_text, license_expression="test")
+        idx = index.LicenseIndex([rule])
+
+        querys = "MODULE_LICENSE_GPL+ +"
+        result = idx.match(query_string=querys)
+        assert len(result) == 1
+        match = result[0]
+
+        expected = "MODULE_LICENSE_GPL+ +\n"
+        matched_text = "".join(
+            get_full_matched_text(match, query_string=querys, idx=idx, _usecache=False)
+        )
+        assert matched_text == expected
+
+    def test_tokenize_matched_text_does_cache_last_call_from_query_string_and_location(
+        self,
+    ):
+        dictionary = {"module": 0, "license": 1, "gpl+": 2}
+        location = None
+        query_string = "the MODULE_LICENSE_GPL+ foobar"
+        result1 = tokenize_matched_text(location, query_string, dictionary)
+        result2 = tokenize_matched_text(location, query_string, dictionary)
+        assert result2 is result1
+
+        location = self.get_test_loc("matched_text/tokenize_matched_text_query.txt")
+        query_string = None
+        result3 = tokenize_matched_text(location, query_string, dictionary)
+        assert result3 is not result2
+        assert result3 == result2
+
+        result4 = tokenize_matched_text(location, query_string, dictionary)
+        assert result4 is result3
+
+    def test_tokenize_matched_text_does_return_correct_tokens(self):
+        querys = """
+            foobar 45 Copyright 2003 (C) James. All Rights Reserved.  THIS
+            IS FROM THE CODEHAUS AND CONTRIBUTORS
+        """
+        dictionary = dict(
+            this=0, event=1, possibility=2, reserved=3, liable=5, copyright=6
+        )
+        result = tokenize_matched_text(
+            location=None, query_string=querys, dictionary=dictionary
+        )
+        expected = [
+            Token(
+                value="\n",
+                line_num=1,
+                pos=-1,
+                is_text=False,
+                is_matched=False,
+                is_known=False,
+            ),
+            Token(
+                value="            ",
+                line_num=2,
+                pos=-1,
+                is_text=False,
+                is_matched=False,
+                is_known=False,
+            ),
+            Token(
+                value="foobar",
+                line_num=2,
+                pos=-1,
+                is_text=True,
+                is_matched=False,
+                is_known=False,
+            ),
+            Token(
+                value=" ",
+                line_num=2,
+                pos=-1,
+                is_text=False,
+                is_matched=False,
+                is_known=False,
+            ),
+            Token(
+                value="45",
+                line_num=2,
+                pos=-1,
+                is_text=True,
+                is_matched=False,
+                is_known=False,
+            ),
+            Token(
+                value=" ",
+                line_num=2,
+                pos=-1,
+                is_text=False,
+                is_matched=False,
+                is_known=False,
+            ),
+            Token(
+                value="Copyright",
+                line_num=2,
+                pos=0,
+                is_text=True,
+                is_matched=False,
+                is_known=True,
+            ),
+            Token(
+                value=" ",
+                line_num=2,
+                pos=-1,
+                is_text=False,
+                is_matched=False,
+                is_known=False,
+            ),
+            Token(
+                value="2003",
+                line_num=2,
+                pos=-1,
+                is_text=True,
+                is_matched=False,
+                is_known=False,
+            ),
+            Token(
+                value=" (",
+                line_num=2,
+                pos=-1,
+                is_text=False,
+                is_matched=False,
+                is_known=False,
+            ),
+            Token(
+                value="C",
+                line_num=2,
+                pos=-1,
+                is_text=True,
+                is_matched=False,
+                is_known=False,
+            ),
+            Token(
+                value=") ",
+                line_num=2,
+                pos=-1,
+                is_text=False,
+                is_matched=False,
+                is_known=False,
+            ),
+            Token(
+                value="James",
+                line_num=2,
+                pos=-1,
+                is_text=True,
+                is_matched=False,
+                is_known=False,
+            ),
+            Token(
+                value=". ",
+                line_num=2,
+                pos=-1,
+                is_text=False,
+                is_matched=False,
+                is_known=False,
+            ),
+            Token(
+                value="All",
+                line_num=2,
+                pos=-1,
+                is_text=True,
+                is_matched=False,
+                is_known=False,
+            ),
+            Token(
+                value=" ",
+                line_num=2,
+                pos=-1,
+                is_text=False,
+                is_matched=False,
+                is_known=False,
+            ),
+            Token(
+                value="Rights",
+                line_num=2,
+                pos=-1,
+                is_text=True,
+                is_matched=False,
+                is_known=False,
+            ),
+            Token(
+                value=" ",
+                line_num=2,
+                pos=-1,
+                is_text=False,
+                is_matched=False,
+                is_known=False,
+            ),
+            Token(
+                value="Reserved",
+                line_num=2,
+                pos=1,
+                is_text=True,
+                is_matched=False,
+                is_known=True,
+            ),
+            Token(
+                value=".  ",
+                line_num=2,
+                pos=-1,
+                is_text=False,
+                is_matched=False,
+                is_known=False,
+            ),
+            Token(
+                value="THIS",
+                line_num=2,
+                pos=2,
+                is_text=True,
+                is_matched=False,
+                is_known=True,
+            ),
+            Token(
+                value="\n",
+                line_num=2,
+                pos=-1,
+                is_text=False,
+                is_matched=False,
+                is_known=False,
+            ),
+            Token(
+                value="            ",
+                line_num=3,
+                pos=-1,
+                is_text=False,
+                is_matched=False,
+                is_known=False,
+            ),
+            Token(
+                value="IS",
+                line_num=3,
+                pos=-1,
+                is_text=True,
+                is_matched=False,
+                is_known=False,
+            ),
+            Token(
+                value=" ",
+                line_num=3,
+                pos=-1,
+                is_text=False,
+                is_matched=False,
+                is_known=False,
+            ),
+            Token(
+                value="FROM",
+                line_num=3,
+                pos=-1,
+                is_text=True,
+                is_matched=False,
+                is_known=False,
+            ),
+            Token(
+                value=" ",
+                line_num=3,
+                pos=-1,
+                is_text=False,
+                is_matched=False,
+                is_known=False,
+            ),
+            Token(
+                value="THE",
+                line_num=3,
+                pos=-1,
+                is_text=True,
+                is_matched=False,
+                is_known=False,
+            ),
+            Token(
+                value=" ",
+                line_num=3,
+                pos=-1,
+                is_text=False,
+                is_matched=False,
+                is_known=False,
+            ),
+            Token(
+                value="CODEHAUS",
+                line_num=3,
+                pos=-1,
+                is_text=True,
+                is_matched=False,
+                is_known=False,
+            ),
+            Token(
+                value=" ",
+                line_num=3,
+                pos=-1,
+                is_text=False,
+                is_matched=False,
+                is_known=False,
+            ),
+            Token(
+                value="AND",
+                line_num=3,
+                pos=-1,
+                is_text=True,
+                is_matched=False,
+                is_known=False,
+            ),
+            Token(
+                value=" ",
+                line_num=3,
+                pos=-1,
+                is_text=False,
+                is_matched=False,
+                is_known=False,
+            ),
+            Token(
+                value="CONTRIBUTORS",
+                line_num=3,
+                pos=-1,
+                is_text=True,
+                is_matched=False,
+                is_known=False,
+            ),
+            Token(
+                value="\n",
+                line_num=3,
+                pos=-1,
+                is_text=False,
+                is_matched=False,
+                is_known=False,
+            ),
+            Token(
+                value="        \n",
+                line_num=4,
+                pos=-1,
+                is_text=False,
+                is_matched=False,
+                is_known=False,
+            ),
+        ]
+
+        assert result == expected
+
+    def test_tokenize_matched_text_does_not_crash_on_turkish_unicode(self):
+        querys = "İrəli"
+        result = tokenize_matched_text(
+            location=None, query_string=querys, dictionary={}
+        )
+
+        expected = [
+            Token(
+                value="i",
+                line_num=1,
+                pos=-1,
+                is_text=True,
+                is_matched=False,
+                is_known=False,
+            ),
+            Token(
+                value="rəli",
+                line_num=1,
+                pos=-1,
+                is_text=True,
+                is_matched=False,
+                is_known=False,
+            ),
+            Token(
+                value="\n",
+                line_num=1,
+                pos=-1,
+                is_text=False,
+                is_matched=False,
+                is_known=False,
+            ),
+        ]
+        assert result == expected
+
+    def test_tokenize_matched_text_behaves_like_query_tokenizer_on_turkish_unicode(
+        self,
+    ):
+        from licensedcode.tokenize import query_tokenizer
+
+        querys = "İrəli"
+        matched_text_result = tokenize_matched_text(
+            location=None, query_string=querys, dictionary={}
+        )
+        matched_text_result = [t.value for t in matched_text_result]
+        query_tokenizer_result = list(query_tokenizer(querys))
+
+        if matched_text_result[-1] == "\n":
+            matched_text_result = matched_text_result[:-1]
+
+        assert matched_text_result == query_tokenizer_result
+
+    def test_reportable_tokens_filter_tokens_does_not_strip_last_token_value(self):
+        tokens = [
+            Token(
+                value="\n",
+                line_num=1,
+                pos=-1,
+                is_text=False,
+                is_matched=False,
+                is_known=False,
+            ),
+            Token(
+                value="            ",
+                line_num=2,
+                pos=-1,
+                is_text=False,
+                is_matched=False,
+                is_known=False,
+            ),
+            Token(
+                value="foobar",
+                line_num=2,
+                pos=-1,
+                is_text=True,
+                is_matched=False,
+                is_known=False,
+            ),
+            Token(
+                value=" ",
+                line_num=2,
+                pos=-1,
+                is_text=False,
+                is_matched=False,
+                is_known=False,
+            ),
+            Token(
+                value="45",
+                line_num=2,
+                pos=-1,
+                is_text=True,
+                is_matched=False,
+                is_known=False,
+            ),
+            Token(
+                value=" ",
+                line_num=2,
+                pos=-1,
+                is_text=False,
+                is_matched=False,
+                is_known=False,
+            ),
+            Token(
+                value="Copyright",
+                line_num=2,
+                pos=0,
+                is_text=True,
+                is_matched=False,
+                is_known=True,
+            ),
+            Token(
+                value=" ",
+                line_num=2,
+                pos=-1,
+                is_text=False,
+                is_matched=False,
+                is_known=False,
+            ),
+            Token(
+                value="2003",
+                line_num=2,
+                pos=-1,
+                is_text=True,
+                is_matched=False,
+                is_known=False,
+            ),
+            Token(
+                value=" (",
+                line_num=2,
+                pos=-1,
+                is_text=False,
+                is_matched=False,
+                is_known=False,
+            ),
+            Token(
+                value="C",
+                line_num=2,
+                pos=-1,
+                is_text=True,
+                is_matched=False,
+                is_known=False,
+            ),
+            Token(
+                value=") ",
+                line_num=2,
+                pos=-1,
+                is_text=False,
+                is_matched=False,
+                is_known=False,
+            ),
+            Token(
+                value="James",
+                line_num=2,
+                pos=-1,
+                is_text=True,
+                is_matched=False,
+                is_known=False,
+            ),
+            Token(
+                value=". ",
+                line_num=2,
+                pos=-1,
+                is_text=False,
+                is_matched=False,
+                is_known=False,
+            ),
+            Token(
+                value="All",
+                line_num=2,
+                pos=-1,
+                is_text=True,
+                is_matched=False,
+                is_known=False,
+            ),
+            Token(
+                value=" ",
+                line_num=2,
+                pos=-1,
+                is_text=False,
+                is_matched=False,
+                is_known=False,
+            ),
+            Token(
+                value="Rights",
+                line_num=2,
+                pos=-1,
+                is_text=True,
+                is_matched=False,
+                is_known=False,
+            ),
+            Token(
+                value=" ",
+                line_num=2,
+                pos=-1,
+                is_text=False,
+                is_matched=False,
+                is_known=False,
+            ),
+            Token(
+                value="Reserved",
+                line_num=2,
+                pos=1,
+                is_text=True,
+                is_matched=False,
+                is_known=True,
+            ),
+            Token(
+                value=".  ",
+                line_num=2,
+                pos=-1,
+                is_text=False,
+                is_matched=False,
+                is_known=False,
+            ),
+            Token(
+                value="THIS",
+                line_num=2,
+                pos=2,
+                is_text=True,
+                is_matched=False,
+                is_known=True,
+            ),
+            Token(
+                value="\n",
+                line_num=2,
+                pos=-1,
+                is_text=False,
+                is_matched=False,
+                is_known=False,
+            ),
+            Token(
+                value="            ",
+                line_num=3,
+                pos=-1,
+                is_text=False,
+                is_matched=False,
+                is_known=False,
+            ),
+        ]
+
+        match_qspan = Span(0, 1)
+        result = list(
+            reportable_tokens(
+                tokens, match_qspan, start_line=1, end_line=2, whole_lines=False
+            )
+        )
+        expected = [
+            Token(
+                value="Copyright",
+                line_num=2,
+                pos=0,
+                is_text=True,
+                is_matched=True,
+                is_known=True,
+            ),
+            Token(
+                value=" ",
+                line_num=2,
+                pos=-1,
+                is_text=False,
+                is_matched=False,
+                is_known=False,
+            ),
+            Token(
+                value="2003",
+                line_num=2,
+                pos=-1,
+                is_text=True,
+                is_matched=False,
+                is_known=False,
+            ),
+            Token(
+                value=" (",
+                line_num=2,
+                pos=-1,
+                is_text=False,
+                is_matched=False,
+                is_known=False,
+            ),
+            Token(
+                value="C",
+                line_num=2,
+                pos=-1,
+                is_text=True,
+                is_matched=False,
+                is_known=False,
+            ),
+            Token(
+                value=") ",
+                line_num=2,
+                pos=-1,
+                is_text=False,
+                is_matched=False,
+                is_known=False,
+            ),
+            Token(
+                value="James",
+                line_num=2,
+                pos=-1,
+                is_text=True,
+                is_matched=False,
+                is_known=False,
+            ),
+            Token(
+                value=". ",
+                line_num=2,
+                pos=-1,
+                is_text=False,
+                is_matched=False,
+                is_known=False,
+            ),
+            Token(
+                value="All",
+                line_num=2,
+                pos=-1,
+                is_text=True,
+                is_matched=False,
+                is_known=False,
+            ),
+            Token(
+                value=" ",
+                line_num=2,
+                pos=-1,
+                is_text=False,
+                is_matched=False,
+                is_known=False,
+            ),
+            Token(
+                value="Rights",
+                line_num=2,
+                pos=-1,
+                is_text=True,
+                is_matched=False,
+                is_known=False,
+            ),
+            Token(
+                value=" ",
+                line_num=2,
+                pos=-1,
+                is_text=False,
+                is_matched=False,
+                is_known=False,
+            ),
+            Token(
+                value="Reserved",
+                line_num=2,
+                pos=1,
+                is_text=True,
+                is_matched=True,
+                is_known=True,
+            ),
+            Token(
+                value=".  ",
+                line_num=2,
+                pos=-1,
+                is_text=False,
+                is_matched=False,
+                is_known=False,
+            ),
+        ]
+
+        assert result == expected
+
+        # test again with whole lines
+        match_qspan = Span(0, 1)
+        result = list(
+            reportable_tokens(
+                tokens, match_qspan, start_line=1, end_line=2, whole_lines=True
+            )
+        )
+        expected = [
+            Token(
+                value="\n",
+                line_num=1,
+                pos=-1,
+                is_text=False,
+                is_matched=False,
+                is_known=False,
+            ),
+            Token(
+                value="            ",
+                line_num=2,
+                pos=-1,
+                is_text=False,
+                is_matched=False,
+                is_known=False,
+            ),
+            Token(
+                value="foobar",
+                line_num=2,
+                pos=-1,
+                is_text=True,
+                is_matched=False,
+                is_known=False,
+            ),
+            Token(
+                value=" ",
+                line_num=2,
+                pos=-1,
+                is_text=False,
+                is_matched=False,
+                is_known=False,
+            ),
+            Token(
+                value="45",
+                line_num=2,
+                pos=-1,
+                is_text=True,
+                is_matched=False,
+                is_known=False,
+            ),
+            Token(
+                value=" ",
+                line_num=2,
+                pos=-1,
+                is_text=False,
+                is_matched=False,
+                is_known=False,
+            ),
+            Token(
+                value="Copyright",
+                line_num=2,
+                pos=0,
+                is_text=True,
+                is_matched=True,
+                is_known=True,
+            ),
+            Token(
+                value=" ",
+                line_num=2,
+                pos=-1,
+                is_text=False,
+                is_matched=False,
+                is_known=False,
+            ),
+            Token(
+                value="2003",
+                line_num=2,
+                pos=-1,
+                is_text=True,
+                is_matched=False,
+                is_known=False,
+            ),
+            Token(
+                value=" (",
+                line_num=2,
+                pos=-1,
+                is_text=False,
+                is_matched=False,
+                is_known=False,
+            ),
+            Token(
+                value="C",
+                line_num=2,
+                pos=-1,
+                is_text=True,
+                is_matched=False,
+                is_known=False,
+            ),
+            Token(
+                value=") ",
+                line_num=2,
+                pos=-1,
+                is_text=False,
+                is_matched=False,
+                is_known=False,
+            ),
+            Token(
+                value="James",
+                line_num=2,
+                pos=-1,
+                is_text=True,
+                is_matched=False,
+                is_known=False,
+            ),
+            Token(
+                value=". ",
+                line_num=2,
+                pos=-1,
+                is_text=False,
+                is_matched=False,
+                is_known=False,
+            ),
+            Token(
+                value="All",
+                line_num=2,
+                pos=-1,
+                is_text=True,
+                is_matched=False,
+                is_known=False,
+            ),
+            Token(
+                value=" ",
+                line_num=2,
+                pos=-1,
+                is_text=False,
+                is_matched=False,
+                is_known=False,
+            ),
+            Token(
+                value="Rights",
+                line_num=2,
+                pos=-1,
+                is_text=True,
+                is_matched=False,
+                is_known=False,
+            ),
+            Token(
+                value=" ",
+                line_num=2,
+                pos=-1,
+                is_text=False,
+                is_matched=False,
+                is_known=False,
+            ),
+            Token(
+                value="Reserved",
+                line_num=2,
+                pos=1,
+                is_text=True,
+                is_matched=True,
+                is_known=True,
+            ),
+            Token(
+                value=".  ",
+                line_num=2,
+                pos=-1,
+                is_text=False,
+                is_matched=False,
+                is_known=False,
+            ),
+            Token(
+                value="THIS",
+                line_num=2,
+                pos=2,
+                is_text=True,
+                is_matched=False,
+                is_known=True,
+            ),
+            Token(
+                value="\n",
+                line_num=2,
+                pos=-1,
+                is_text=False,
+                is_matched=False,
+                is_known=False,
+            ),
+        ]
+
+        assert result == expected
+
+    def test_matched_text_is_collected_correctly_end2end(self):
+        rules_data_dir = self.get_test_loc("matched_text/index/rules")
+        query_location = self.get_test_loc("matched_text/query.txt")
+        rules = models.load_rules(rules_data_dir)
+        idx = LicenseIndex(rules)
+
+        results = [
+            match.matched_text(_usecache=False)
+            for match in idx.match(location=query_location)
+        ]
+        expected = [
+            "This source code is licensed under both the Apache 2.0 license "
+            "(found in the\n#  LICENSE",
+            "This source code is licensed under [both] [the] [Apache] [2].[0] license "
+            "(found in the\n#  LICENSE file in the root directory of this source tree)",
+            "GPLv2 (",
+        ]
+        assert results == expected
+
+    def check_matched_texts(self, test_loc, expected_texts, whole_lines=True):
+        idx = cache.get_index()
+        test_loc = self.get_test_loc(test_loc)
+        matches = idx.match(location=test_loc)
+        matched_texts = [
+            m.matched_text(whole_lines=whole_lines, highlight=False, _usecache=False)
+            for m in matches
+        ]
+        assert matched_texts == expected_texts
+
+    def test_matched_text_is_collected_correctly_end2end_for_spdx_match_whole_lines(
+        self,
+    ):
+        self.check_matched_texts(
+            test_loc="matched_text/spdx/query.txt",
+            expected_texts=["@REM # SPDX-License-Identifier: BSD-2-Clause-Patent"],
+            whole_lines=True,
+        )
+
+    def test_matched_text_is_collected_correctly_end2end_for_spdx_match_plain(self):
+        self.check_matched_texts(
+            test_loc="matched_text/spdx/query.txt",
+            expected_texts=["SPDX-License-Identifier: BSD-2-Clause-Patent"],
+            whole_lines=False,
+        )
+
+    def test_matched_text_is_not_truncated_with_unicode_diacritic_input_from_query(
+        self,
+    ):
+        idx = cache.get_index()
+        querys_with_diacritic_unicode = "İ license MIT"
+        result = idx.match(query_string=querys_with_diacritic_unicode)
+        assert len(result) == 1
+        match = result[0]
+        expected = "license MIT"
+        matched_text = match.matched_text(
+            _usecache=False,
+        )
+        assert matched_text == expected
+
+    def test_matched_text_is_not_truncated_with_unicode_diacritic_input_from_file(self):
+        idx = cache.get_index()
+        file_with_diacritic_unicode_location = self.get_test_loc(
+            "matched_text/unicode_text/main3.js"
+        )
+        result = idx.match(location=file_with_diacritic_unicode_location)
+        assert len(result) == 1
+        match = result[0]
+        expected = "license MIT"
+        matched_text = match.matched_text(_usecache=False)
+        assert matched_text == expected
+
+    def test_matched_text_is_not_truncated_with_unicode_diacritic_input_from_query_whole_lines(
+        self,
+    ):
+        idx = cache.get_index()
+        querys_with_diacritic_unicode = "İ license MIT"
+        result = idx.match(query_string=querys_with_diacritic_unicode)
+        assert len(result) == 1
+        match = result[0]
+        expected = "[İ] license MIT"
+        matched_text = match.matched_text(_usecache=False, whole_lines=True)
+        assert matched_text == expected
+
+    def test_matched_text_is_not_truncated_with_unicode_diacritic_input_with_diacritic_in_rules(
+        self,
+    ):
+        rule_dir = self.get_test_loc("matched_text/turkish_unicode/rules")
+        idx = index.LicenseIndex(load_rules(rule_dir))
+        query_loc = self.get_test_loc("matched_text/turkish_unicode/query")
+        matches = idx.match(location=query_loc)
+        matched_texts = [
+            m.matched_text(whole_lines=False, highlight=False, _usecache=False)
+            for m in matches
+        ]
+
+        expected = [
+            "Licensed under the Apache License, Version 2.0\r\nnext_label=irəli",
+            "İ license MIT",
+            "İ license MIT",
+            "Licensed under the Apache License, Version 2.0\r\nnext_label=irəli",
+            "lİcense mit",
+        ]
+
+        assert matched_texts == expected
+
+    def test_matched_text_is_not_truncated_with_unicode_diacritic_input_and_full_index(
+        self,
+    ):
+        expected = [
+            "Licensed under the Apache License, Version 2.0",
+            "license MIT",
+            "license MIT",
+            "Licensed under the Apache License, Version 2.0",
+        ]
+
+        self.check_matched_texts(
+            test_loc="matched_text/turkish_unicode/query",
+            expected_texts=expected,
+            whole_lines=False,
+        )
+
+    def test_matched_text_does_not_ignores_whole_lines_in_binary_with_small_index(self):
+        rule_dir = self.get_test_loc("matched_text/binary_text/rules")
+        idx = index.LicenseIndex(load_rules(rule_dir))
+        query_loc = self.get_test_loc("matched_text/binary_text/gosu")
+        matches = idx.match(location=query_loc)
+        matched_texts = [
+            m.matched_text(whole_lines=True, highlight=False, _usecache=False)
+            for m in matches
+        ]
+
+        expected = [
+            "{{ .Self }} license: GPL-3 (full text at https://github.com/tianon/gosu)"
+        ]
+
+        assert matched_texts == expected
+
+    def test_matched_text_does_not_ignores_whole_lines_in_binary_against_full_index(
+        self,
+    ):
+        expected = [
+            "{{ .Self }} license: GPL-3 (full text at https://github.com/tianon/gosu)"
+        ]
+        self.check_matched_texts(
+            test_loc="matched_text/binary_text/gosu",
+            expected_texts=expected,
+            whole_lines=True,
+        )
+
+    def test_matched_text_is_collected_correctly_in_binary_ffmpeg_windows_whole_lines(
+        self,
+    ):
+        expected_texts = [
+            "--enable-gpl --enable-version3 --enable-dxva2 --enable-libmfx --enable-nvenc "
+            "--enable-avisynth --enable-bzlib --enable-fontconfig --enable-frei0r "
+            "--enable-gnutls --enable-iconv --enable-libass --enable-libbluray "
+            "--enable-libbs2b --enable-libcaca --enable-libfreetype --enable-libgme "
+            "--enable-libgsm --enable-libilbc --enable-libmodplug --enable-libmp3lame "
+            "--enable-libopencore-amrnb --enable-libopencore-amrwb --enable-libopenh264 "
+            "--enable-libopenjpeg --enable-libopus --enable-librtmp --enable-libsnappy "
+            "--enable-libsoxr --enable-libspeex --enable-libtheora --enable-libtwolame "
+            "--enable-libvidstab --enable-libvo-amrwbenc --enable-libvorbis "
+            "--enable-libvpx --enable-libwavpack --enable-libwebp --enable-libx264 "
+            "--enable-libx265 --enable-libxavs --enable-libxvid --enable-libzimg "
+            "--enable-lzma --enable-decklink --enable-zlib",
+            "%sconfiguration: --enable-gpl --enable-version3 --enable-dxva2 "
+            "--enable-libmfx --enable-nvenc --enable-avisynth --enable-bzlib "
+            "--enable-fontconfig --enable-frei0r --enable-gnutls --enable-iconv "
+            "--enable-libass --enable-libbluray --enable-libbs2b --enable-libcaca "
+            "--enable-libfreetype --enable-libgme --enable-libgsm --enable-libilbc "
+            "--enable-libmodplug --enable-libmp3lame --enable-libopencore-amrnb "
+            "--enable-libopencore-amrwb --enable-libopenh264 --enable-libopenjpeg "
+            "--enable-libopus --enable-librtmp --enable-libsnappy --enable-libsoxr "
+            "--enable-libspeex --enable-libtheora --enable-libtwolame --enable-libvidstab "
+            "--enable-libvo-amrwbenc --enable-libvorbis --enable-libvpx "
+            "--enable-libwavpack --enable-libwebp --enable-libx264 --enable-libx265 "
+            "--enable-libxavs --enable-libxvid --enable-libzimg --enable-lzma "
+            "--enable-decklink --enable-zlib",
+            "%s is free software; you can redistribute it and/or modify\n"
+            "it under the terms of the GNU General Public License as published by\n"
+            "the Free Software Foundation; either version 3 of the License, or\n"
+            "(at your option) any later version.\n"
+            "%s is distributed in the hope that it will be useful,\n"
+            "but WITHOUT ANY WARRANTY; without even the implied warranty of\n"
+            "MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the\n"
+            "GNU General Public License for more details.\n"
+            "You should have received a copy of the GNU General Public License\n"
+            "along with %s.  If not, see <http://www.gnu.org/licenses/>.\n"
+            "File formats:\n"
+            "D. = Demuxing supported\n"
+            ".E = Muxing supported\n"
+            "%s%s %-15s %s\n"
+            "Devices:\n"
+            "Codecs:\n"
+            "D..... = Decoding supported\n"
+            ".E.... = Encoding supported\n"
+            "..V... = Video codec\n"
+            "No option name near '%s'\n"
+            "Unable to parse '%s': %s\n"
+            "Setting '%s' to value '%s'\n"
+            "Option '%s' not found\n"
+            "--enable-gpl --enable-version3 --enable-dxva2 --enable-libmfx --enable-nvenc "
+            "--enable-avisynth --enable-bzlib --enable-fontconfig --enable-frei0r "
+            "--enable-gnutls --enable-iconv --enable-libass --enable-libbluray "
+            "--enable-libbs2b --enable-libcaca --enable-libfreetype --enable-libgme "
+            "--enable-libgsm --enable-libilbc --enable-libmodplug --enable-libmp3lame "
+            "--enable-libopencore-amrnb --enable-libopencore-amrwb --enable-libopenh264 "
+            "--enable-libopenjpeg --enable-libopus --enable-librtmp --enable-libsnappy "
+            "--enable-libsoxr --enable-libspeex --enable-libtheora --enable-libtwolame "
+            "--enable-libvidstab --enable-libvo-amrwbenc --enable-libvorbis "
+            "--enable-libvpx --enable-libwavpack --enable-libwebp --enable-libx264 "
+            "--enable-libx265 --enable-libxavs --enable-libxvid --enable-libzimg "
+            "--enable-lzma --enable-decklink --enable-zlib",
+            "--enable-gpl --enable-version3 --enable-dxva2 --enable-libmfx --enable-nvenc "
+            "--enable-avisynth --enable-bzlib --enable-fontconfig --enable-frei0r "
+            "--enable-gnutls --enable-iconv --enable-libass --enable-libbluray "
+            "--enable-libbs2b --enable-libcaca --enable-libfreetype --enable-libgme "
+            "--enable-libgsm --enable-libilbc --enable-libmodplug --enable-libmp3lame "
+            "--enable-libopencore-amrnb --enable-libopencore-amrwb --enable-libopenh264 "
+            "--enable-libopenjpeg --enable-libopus --enable-librtmp --enable-libsnappy "
+            "--enable-libsoxr --enable-libspeex --enable-libtheora --enable-libtwolame "
+            "--enable-libvidstab --enable-libvo-amrwbenc --enable-libvorbis "
+            "--enable-libvpx --enable-libwavpack --enable-libwebp --enable-libx264 "
+            "--enable-libx265 --enable-libxavs --enable-libxvid --enable-libzimg "
+            "--enable-lzma --enable-decklink --enable-zlib",
+            "libavfilter license: GPL version 3 or later",
+            "--enable-gpl --enable-version3 --enable-dxva2 --enable-libmfx --enable-nvenc "
+            "--enable-avisynth --enable-bzlib --enable-fontconfig --enable-frei0r "
+            "--enable-gnutls --enable-iconv --enable-libass --enable-libbluray "
+            "--enable-libbs2b --enable-libcaca --enable-libfreetype --enable-libgme "
+            "--enable-libgsm --enable-libilbc --enable-libmodplug --enable-libmp3lame "
+            "--enable-libopencore-amrnb --enable-libopencore-amrwb --enable-libopenh264 "
+            "--enable-libopenjpeg --enable-libopus --enable-librtmp --enable-libsnappy "
+            "--enable-libsoxr --enable-libspeex --enable-libtheora --enable-libtwolame "
+            "--enable-libvidstab --enable-libvo-amrwbenc --enable-libvorbis "
+            "--enable-libvpx --enable-libwavpack --enable-libwebp --enable-libx264 "
+            "--enable-libx265 --enable-libxavs --enable-libxvid --enable-libzimg "
+            "--enable-lzma --enable-decklink --enable-zlib",
+            "libavformat license: GPL version 3 or later",
+            "--enable-gpl --enable-version3 --enable-dxva2 --enable-libmfx --enable-nvenc "
+            "--enable-avisynth --enable-bzlib --enable-fontconfig --enable-frei0r "
+            "--enable-gnutls --enable-iconv --enable-libass --enable-libbluray "
+            "--enable-libbs2b --enable-libcaca --enable-libfreetype --enable-libgme "
+            "--enable-libgsm --enable-libilbc --enable-libmodplug --enable-libmp3lame "
+            "--enable-libopencore-amrnb --enable-libopencore-amrwb --enable-libopenh264 "
+            "--enable-libopenjpeg --enable-libopus --enable-librtmp --enable-libsnappy "
+            "--enable-libsoxr --enable-libspeex --enable-libtheora --enable-libtwolame "
+            "--enable-libvidstab --enable-libvo-amrwbenc --enable-libvorbis "
+            "--enable-libvpx --enable-libwavpack --enable-libwebp --enable-libx264 "
+            "--enable-libx265 --enable-libxavs --enable-libxvid --enable-libzimg "
+            "--enable-lzma --enable-decklink --enable-zlib",
+            "libavcodec license: GPL version 3 or later",
+            "--enable-gpl --enable-version3 --enable-dxva2 --enable-libmfx --enable-nvenc "
+            "--enable-avisynth --enable-bzlib --enable-fontconfig --enable-frei0r "
+            "--enable-gnutls --enable-iconv --enable-libass --enable-libbluray "
+            "--enable-libbs2b --enable-libcaca --enable-libfreetype --enable-libgme "
+            "--enable-libgsm --enable-libilbc --enable-libmodplug --enable-libmp3lame "
+            "--enable-libopencore-amrnb --enable-libopencore-amrwb --enable-libopenh264 "
+            "--enable-libopenjpeg --enable-libopus --enable-librtmp --enable-libsnappy "
+            "--enable-libsoxr --enable-libspeex --enable-libtheora --enable-libtwolame "
+            "--enable-libvidstab --enable-libvo-amrwbenc --enable-libvorbis "
+            "--enable-libvpx --enable-libwavpack --enable-libwebp --enable-libx264 "
+            "--enable-libx265 --enable-libxavs --enable-libxvid --enable-libzimg "
+            "--enable-lzma --enable-decklink --enable-zlib",
+            "libpostproc license: GPL version 3 or later",
+            "--enable-gpl --enable-version3 --enable-dxva2 --enable-libmfx --enable-nvenc "
+            "--enable-avisynth --enable-bzlib --enable-fontconfig --enable-frei0r "
+            "--enable-gnutls --enable-iconv --enable-libass --enable-libbluray "
+            "--enable-libbs2b --enable-libcaca --enable-libfreetype --enable-libgme "
+            "--enable-libgsm --enable-libilbc --enable-libmodplug --enable-libmp3lame "
+            "--enable-libopencore-amrnb --enable-libopencore-amrwb --enable-libopenh264 "
+            "--enable-libopenjpeg --enable-libopus --enable-librtmp --enable-libsnappy "
+            "--enable-libsoxr --enable-libspeex --enable-libtheora --enable-libtwolame "
+            "--enable-libvidstab --enable-libvo-amrwbenc --enable-libvorbis "
+            "--enable-libvpx --enable-libwavpack --enable-libwebp --enable-libx264 "
+            "--enable-libx265 --enable-libxavs --enable-libxvid --enable-libzimg "
+            "--enable-lzma --enable-decklink --enable-zlib",
+            "libswresample license: GPL version 3 or later",
+            "--enable-gpl --enable-version3 --enable-dxva2 --enable-libmfx --enable-nvenc "
+            "--enable-avisynth --enable-bzlib --enable-fontconfig --enable-frei0r "
+            "--enable-gnutls --enable-iconv --enable-libass --enable-libbluray "
+            "--enable-libbs2b --enable-libcaca --enable-libfreetype --enable-libgme "
+            "--enable-libgsm --enable-libilbc --enable-libmodplug --enable-libmp3lame "
+            "--enable-libopencore-amrnb --enable-libopencore-amrwb --enable-libopenh264 "
+            "--enable-libopenjpeg --enable-libopus --enable-librtmp --enable-libsnappy "
+            "--enable-libsoxr --enable-libspeex --enable-libtheora --enable-libtwolame "
+            "--enable-libvidstab --enable-libvo-amrwbenc --enable-libvorbis "
+            "--enable-libvpx --enable-libwavpack --enable-libwebp --enable-libx264 "
+            "--enable-libx265 --enable-libxavs --enable-libxvid --enable-libzimg "
+            "--enable-lzma --enable-decklink --enable-zlib",
+            "libswscale license: GPL version 3 or later",
+            "--enable-gpl --enable-version3 --enable-dxva2 --enable-libmfx --enable-nvenc "
+            "--enable-avisynth --enable-bzlib --enable-fontconfig --enable-frei0r "
+            "--enable-gnutls --enable-iconv --enable-libass --enable-libbluray "
+            "--enable-libbs2b --enable-libcaca --enable-libfreetype --enable-libgme "
+            "--enable-libgsm --enable-libilbc --enable-libmodplug --enable-libmp3lame "
+            "--enable-libopencore-amrnb --enable-libopencore-amrwb --enable-libopenh264 "
+            "--enable-libopenjpeg --enable-libopus --enable-librtmp --enable-libsnappy "
+            "--enable-libsoxr --enable-libspeex --enable-libtheora --enable-libtwolame "
+            "--enable-libvidstab --enable-libvo-amrwbenc --enable-libvorbis "
+            "--enable-libvpx --enable-libwavpack --enable-libwebp --enable-libx264 "
+            "--enable-libx265 --enable-libxavs --enable-libxvid --enable-libzimg "
+            "--enable-lzma --enable-decklink --enable-zlib",
+            "libavutil license: GPL version 3 or later",
+            "This software is derived from the GNU GPL XviD codec (1.3.0).",
+        ]
+
+        self.check_matched_texts(
+            test_loc="matched_text/ffmpeg/ffmpeg.exe",
+            expected_texts=expected_texts,
+            whole_lines=True,
+        )
+
+    def test_matched_text_is_collected_correctly_in_binary_ffmpeg_windows_not_whole_lines(
+        self,
+    ):
+        expected_texts = [
+            "enable-gpl --enable-version3 --",
+            "enable-gpl --enable-version3 --",
+            "is free software; you can redistribute it and/or modify\n"
+            "it under the terms of the GNU General Public License as published by\n"
+            "the Free Software Foundation; either version 3 of the License, or\n"
+            "(at your option) any later version.\n"
+            "%s is distributed in the hope that it will be useful,\n"
+            "but WITHOUT ANY WARRANTY; without even the implied warranty of\n"
+            "MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the\n"
+            "GNU General Public License for more details.\n"
+            "You should have received a copy of the GNU General Public License\n"
+            "along with %s.  If not, see <http://www.gnu.org/licenses/>.\n"
+            "File formats:\n"
+            "D. = Demuxing supported\n"
+            ".E = Muxing supported\n"
+            "%s%s %-15s %s\n"
+            "Devices:\n"
+            "Codecs:\n"
+            "D..... = Decoding supported\n"
+            ".E.... = Encoding supported\n"
+            "..V... = Video codec\n"
+            "No option name near '%s'\n"
+            "Unable to parse '%s': %s\n"
+            "Setting '%s' to value '%s'\n"
+            "Option '%s' not found\n"
+            "--enable-gpl --",
+            "enable-gpl --enable-version3 --",
+            "license: GPL version 3 or later",
+            "enable-gpl --enable-version3 --",
+            "license: GPL version 3 or later",
+            "enable-gpl --enable-version3 --",
+            "license: GPL version 3 or later",
+            "enable-gpl --enable-version3 --",
+            "license: GPL version 3 or later",
+            "enable-gpl --enable-version3 --",
+            "license: GPL version 3 or later",
+            "enable-gpl --enable-version3 --",
+            "license: GPL version 3 or later",
+            "enable-gpl --enable-version3 --",
+            "license: GPL version 3 or later",
+            "This software is derived from the GNU GPL XviD codec (",
+        ]
+
+        self.check_matched_texts(
+            test_loc="matched_text/ffmpeg/ffmpeg.exe",
+            expected_texts=expected_texts,
+            whole_lines=False,
+        )
+
+    def test_matched_text_is_collected_correctly_in_binary_ffmpeg_elf_whole_lines(self):
+        expected_texts = [
+            "--prefix=/usr --extra-version=0ubuntu0.1 --build-suffix=-ffmpeg "
+            "--toolchain=hardened --libdir=/usr/lib/x86_64-linux-gnu "
+            "--incdir=/usr/include/x86_64-linux-gnu --cc=cc --cxx=g++ --enable-gpl "
+            "--enable-shared --disable-stripping --disable-decoder=libopenjpeg "
+            "--disable-decoder=libschroedinger --enable-avresample --enable-avisynth "
+            "--enable-gnutls --enable-ladspa --enable-libass --enable-libbluray "
+            "--enable-libbs2b --enable-libcaca --enable-libcdio --enable-libflite "
+            "--enable-libfontconfig --enable-libfreetype --enable-libfribidi "
+            "--enable-libgme --enable-libgsm --enable-libmodplug --enable-libmp3lame "
+            "--enable-libopenjpeg --enable-libopus --enable-libpulse --enable-librtmp "
+            "--enable-libschroedinger --enable-libshine --enable-libsnappy "
+            "--enable-libsoxr --enable-libspeex --enable-libssh --enable-libtheora "
+            "--enable-libtwolame --enable-libvorbis --enable-libvpx --enable-libwavpack "
+            "--enable-libwebp --enable-libx265 --enable-libxvid --enable-libzvbi "
+            "--enable-openal --enable-opengl --enable-x11grab --enable-libdc1394 "
+            "--enable-libiec61883 --enable-libzmq --enable-frei0r --enable-libx264 "
+            "--enable-libopencv",
+            "%sconfiguration: --prefix=/usr --extra-version=0ubuntu0.1 "
+            "--build-suffix=-ffmpeg --toolchain=hardened "
+            "--libdir=/usr/lib/x86_64-linux-gnu --incdir=/usr/include/x86_64-linux-gnu "
+            "--cc=cc --cxx=g++ --enable-gpl --enable-shared --disable-stripping "
+            "--disable-decoder=libopenjpeg --disable-decoder=libschroedinger "
+            "--enable-avresample --enable-avisynth --enable-gnutls --enable-ladspa "
+            "--enable-libass --enable-libbluray --enable-libbs2b --enable-libcaca "
+            "--enable-libcdio --enable-libflite --enable-libfontconfig "
+            "--enable-libfreetype --enable-libfribidi --enable-libgme --enable-libgsm "
+            "--enable-libmodplug --enable-libmp3lame --enable-libopenjpeg "
+            "--enable-libopus --enable-libpulse --enable-librtmp --enable-libschroedinger "
+            "--enable-libshine --enable-libsnappy --enable-libsoxr --enable-libspeex "
+            "--enable-libssh --enable-libtheora --enable-libtwolame --enable-libvorbis "
+            "--enable-libvpx --enable-libwavpack --enable-libwebp --enable-libx265 "
+            "--enable-libxvid --enable-libzvbi --enable-openal --enable-opengl "
+            "--enable-x11grab --enable-libdc1394 --enable-libiec61883 --enable-libzmq "
+            "--enable-frei0r --enable-libx264 --enable-libopencv",
+            "%s is free software; you can redistribute it and/or modify\n"
+            "it under the terms of the GNU General Public License as published by\n"
+            "the Free Software Foundation; either version 2 of the License, or\n"
+            "(at your option) any later version.\n"
+            "%s is distributed in the hope that it will be useful,\n"
+            "but WITHOUT ANY WARRANTY; without even the implied warranty of\n"
+            "MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the\n"
+            "GNU General Public License for more details.\n"
+            "You should have received a copy of the GNU General Public License\n"
+            "along with %s; if not, write to the Free Software\n"
+            "Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA",
+        ]
+
+        self.check_matched_texts(
+            test_loc="matched_text/ffmpeg/ffmpeg",
+            expected_texts=expected_texts,
+            whole_lines=True,
+        )
+
+    def test_matched_text_is_collected_correctly_in_binary_ffmpeg_static_whole_lines(
+        self,
+    ):
+        expected_texts = ["libswresample license: LGPL version 2.1 or later"]
+        self.check_matched_texts(
+            test_loc="matched_text/ffmpeg/libavsample.lib",
+            expected_texts=expected_texts,
+            whole_lines=True,
+        )

From 76e8a0383933bf9da464fbc2347fb60df1d3071f Mon Sep 17 00:00:00 2001
From: Philippe Ombredanne <pombredanne@nexb.com>
Date: Tue, 9 Aug 2022 12:32:37 +0200
Subject: [PATCH 30/59] Make scancode match text test pass

* Some adjustments were needed to ensure we could run these copied tests
  correctly.

* See also this issue that requires to install SCTK locally in SCIO
  using "pip install --editable <path to your scancode checkout"
  and then to run "scancode --reinxdex-licenses" to get proper paths.
  https://github.com/nexB/scancode-toolkit/issues/3044

Signed-off-by: Philippe Ombredanne <pombredanne@nexb.com>
---
 scantext/match_text.py            |  3 ---
 scantext/tests/test_match_text.py | 25 ++++++++++---------------
 2 files changed, 10 insertions(+), 18 deletions(-)

diff --git a/scantext/match_text.py b/scantext/match_text.py
index e43a60ca2..2c46cacfb 100644
--- a/scantext/match_text.py
+++ b/scantext/match_text.py
@@ -7,11 +7,8 @@
 # See https://aboutcode.org for more information about nexB OSS projects.
 #
 
-from enum import IntEnum
-from itertools import groupby
 
 import attr
-from attr import validators
 from licensedcode import query
 from licensedcode.spans import Span
 from licensedcode.stopwords import STOPWORDS
diff --git a/scantext/tests/test_match_text.py b/scantext/tests/test_match_text.py
index 975a03c63..6852bde77 100644
--- a/scantext/tests/test_match_text.py
+++ b/scantext/tests/test_match_text.py
@@ -13,16 +13,11 @@
 from licensedcode import cache
 from licensedcode import index
 from licensedcode import models
-from licensedcode.index import LicenseIndex
-from licensedcode.match import LicenseMatch
-from scantext.match_text import Token
+from licensedcode.spans import Span
 from scantext.match_text import get_full_matched_text
 from scantext.match_text import reportable_tokens
-from licensedcode.match import tokenize_matched_text
-from licensedcode.models import Rule
-from licensedcode.models import load_rules
-from licensedcode.query import Query
-from licensedcode.spans import Span
+from scantext.match_text import Token
+from scantext.match_text import tokenize_matched_text
 
 
 TEST_DATA_DIR = os.path.join(os.path.dirname(__file__), "data")
@@ -39,7 +34,7 @@ def test_get_full_matched_text_base(self):
             EVEN IF ADVISED OF THE [[POSSIBILITY OF SUCH]] DAMAGE
         """
 
-        rule = Rule(stored_text=rule_text, license_expression="test")
+        rule = models.Rule(stored_text=rule_text, license_expression="test")
         idx = index.LicenseIndex([rule])
 
         querys = """
@@ -95,7 +90,7 @@ def test_get_full_matched_text(self):
             EVEN IF ADVISED OF THE [[POSSIBILITY OF SUCH]] DAMAGE
         """
 
-        rule = Rule(stored_text=rule_text, license_expression="test")
+        rule = models.Rule(stored_text=rule_text, license_expression="test")
         idx = index.LicenseIndex([rule])
 
         querys = """
@@ -159,7 +154,7 @@ def test_get_full_matched_text(self):
     def test_get_full_matched_text_does_not_munge_underscore(self):
         rule_text = "MODULE_LICENSE_GPL"
 
-        rule = Rule(stored_text=rule_text, license_expression="test")
+        rule = models.Rule(stored_text=rule_text, license_expression="test")
         idx = index.LicenseIndex([rule])
 
         querys = "MODULE_LICENSE_GPL"
@@ -176,7 +171,7 @@ def test_get_full_matched_text_does_not_munge_underscore(self):
     def test_get_full_matched_text_does_not_munge_plus(self):
         rule_text = "MODULE_LICENSE_GPL+ +"
 
-        rule = Rule(stored_text=rule_text, license_expression="test")
+        rule = models.Rule(stored_text=rule_text, license_expression="test")
         idx = index.LicenseIndex([rule])
 
         querys = "MODULE_LICENSE_GPL+ +"
@@ -1067,7 +1062,7 @@ def test_matched_text_is_collected_correctly_end2end(self):
         rules_data_dir = self.get_test_loc("matched_text/index/rules")
         query_location = self.get_test_loc("matched_text/query.txt")
         rules = models.load_rules(rules_data_dir)
-        idx = LicenseIndex(rules)
+        idx = index.LicenseIndex(rules)
 
         results = [
             match.matched_text(_usecache=False)
@@ -1150,7 +1145,7 @@ def test_matched_text_is_not_truncated_with_unicode_diacritic_input_with_diacrit
         self,
     ):
         rule_dir = self.get_test_loc("matched_text/turkish_unicode/rules")
-        idx = index.LicenseIndex(load_rules(rule_dir))
+        idx = index.LicenseIndex(models.load_rules(rule_dir))
         query_loc = self.get_test_loc("matched_text/turkish_unicode/query")
         matches = idx.match(location=query_loc)
         matched_texts = [
@@ -1186,7 +1181,7 @@ def test_matched_text_is_not_truncated_with_unicode_diacritic_input_and_full_ind
 
     def test_matched_text_does_not_ignores_whole_lines_in_binary_with_small_index(self):
         rule_dir = self.get_test_loc("matched_text/binary_text/rules")
-        idx = index.LicenseIndex(load_rules(rule_dir))
+        idx = index.LicenseIndex(models.load_rules(rule_dir))
         query_loc = self.get_test_loc("matched_text/binary_text/gosu")
         matches = idx.match(location=query_loc)
         matched_texts = [

From 8727cb7b81c082fc670744535137fb255c81c07b Mon Sep 17 00:00:00 2001
From: Philippe Ombredanne <pombredanne@nexb.com>
Date: Tue, 9 Aug 2022 12:57:10 +0200
Subject: [PATCH 31/59] Add list of matches to Token

Signed-off-by: Philippe Ombredanne <pombredanne@nexb.com>
---
 scantext/match_text.py | 24 +++++++++++++++++++++++-
 1 file changed, 23 insertions(+), 1 deletion(-)

diff --git a/scantext/match_text.py b/scantext/match_text.py
index 2c46cacfb..f621872b0 100644
--- a/scantext/match_text.py
+++ b/scantext/match_text.py
@@ -7,7 +7,6 @@
 # See https://aboutcode.org for more information about nexB OSS projects.
 #
 
-
 import attr
 from licensedcode import query
 from licensedcode.spans import Span
@@ -65,21 +64,44 @@ class Token(object):
     """
     Used to represent a token in collected query-side matched texts and SPDX
     identifiers.
+
+    ``matches`` is a lits of LicenseMatch to accomodate for overlapping matches.
+    For example, say we have these two matched text portions:
+    QueryText: this is licensed under GPL or MIT
+    Match1:    this is licensed under GPL
+    Match2:            licensed under GPL or MIT
+
+    Each Token would be to assigned one or more LicenseMatch:
+        this:         Match1            : yellow
+        is:           Match1            : yellow
+        licensed:     Match1, Match2    : orange (mixing yellow and pink colors)
+        under:        Match1, Match2    : orange (mixing yellow and pink colors)
+        GPL:          Match1, Match2    : orange (mixing yellow and pink colors)
+        or:           Match2            : pink
+        MIT:          Match2            : pink
     """
 
     # original text value for this token.
     value = attr.ib()
+
     # line number, one-based
     line_num = attr.ib()
+
     # absolute position for known tokens, zero-based. -1 for unknown tokens
     pos = attr.ib(default=-1)
+
     # True if text/alpha False if this is punctuation or spaces
     is_text = attr.ib(default=False)
+
     # True if part of a match
     is_matched = attr.ib(default=False)
+
     # True if this is a known token
     is_known = attr.ib(default=False)
 
+    # List of LicenseMatch that match this token
+    matches = attr.ib(attr.Factory(list))
+
 
 def tokenize_matched_text(
     location,

From 3f7b64081294174510976f5025b9fbafb472eb1d Mon Sep 17 00:00:00 2001
From: Akhil Raj <lf32.dev@gmail.com>
Date: Fri, 12 Aug 2022 15:29:17 +0530
Subject: [PATCH 32/59] Add all detected values into the table #450

Signed-off-by: Akhil Raj <lf32.dev@gmail.com>
---
 .../includes/license_summary_cards.html       | 59 ++++++++++++++--
 .../includes/license_summary_detail.html      | 69 +++++++++++++++----
 scantext/views.py                             |  8 ++-
 3 files changed, 115 insertions(+), 21 deletions(-)

diff --git a/scantext/templates/scantext/includes/license_summary_cards.html b/scantext/templates/scantext/includes/license_summary_cards.html
index 88f63587b..f099bbbd5 100644
--- a/scantext/templates/scantext/includes/license_summary_cards.html
+++ b/scantext/templates/scantext/includes/license_summary_cards.html
@@ -39,29 +39,74 @@
                         <div class="table-container mt-3">
                             <table class="table is-striped is-narrow is-hoverable is-fullwidth">
                                 <tr>
-                                    <td><strong>Name</strong></td>
-                                    <td>{{ license.name }}</td>
+                                    <td><strong>Key</strong></td>
+                                    <td>{{ license.key }}</td>
                                 </tr>
                                 <tr>
                                     <td><strong>Score</strong></td>
                                     <td>{{ license.score }}</td>
                                 </tr>
                                 <tr>
-                                    <td><strong>Owner</strong></td>
-                                    <td>{{ license.owner }}</td>
+                                    <td><strong>Name</strong></td>
+                                    <td>{{ license.name }}</td>
+                                </tr>
+                                <tr>
+                                    <td><strong>Short Name</strong></td>
+                                    <td>{{ license.short_name }}</td>
+                                </tr>
+                                <tr>
+                                    <td><strong>Lines</strong></td>
+                                    <td>{% if license.start_line == license.end_line %} Line {{ license.start_line }} {% else %} Lines {{ license.start_line }} - {{ license.end_line }} {% endif %}</td>
                                 </tr>
                                 <tr>
                                     <td><strong>Category</strong></td>
                                     <td>{{ license.category }}</td>
                                 </tr>
                                 <tr>
-                                    <td><strong>SPDX Key</strong></td>
+                                    <td><strong>Reference</strong></td>
+                                    <td><a href="{{ license.reference_url }}" target="_blank">{{ license.reference_url }}</a></td>
+                                </tr>
+                                <tr>
+                                    <td><strong>Exection</strong></td>
+                                    <td>{{ license.is_exception }}</td>
+                                </tr>
+                                <tr>
+                                    <td><strong>Is Unknown</strong></td>
+                                    <td>{{ license.is_unknown }}</td>
+                                </tr>
+                                <tr>
+                                    <td><strong>Owner</strong></td>
+                                    <td>{{ license.owner }}</td>
+                                </tr>
+                                <tr>
+                                    <td><strong>Homepage</strong></td>
+                                    <td>{{ license.homepage_url }}</td>
+                                </tr>
+                                <tr>
+                                    <td><strong>Text URL</strong></td>
+                                    <td>{{ license.text_url }}</td>
+                                </tr>
+                                <tr>
+                                    <td><strong>Scancode Text URL</strong></td>
+                                    <td>{{ license.scancode_text_url }}</td>
+                                </tr>
+                                <tr>
+                                    <td><strong>Scancode Data URL</strong></td>
+                                    <td>{{ license.scancode_data_url }}</td>
+                                </tr>
+                                <tr>
+                                    <td><strong>SPDX License Key</strong></td>
                                     <td>{{ license.spdx_license_key }}</td>
                                 </tr>
                                 <tr>
-                                    <td><strong>Reference</strong></td>
-                                    <td><a href="{{ license.reference_url }}" target="_blank">{{ license.reference_url }}</a></td>
+                                    <td><strong>SPDX URL</strong></td>
+                                    <td>{{ license.spdx_url }}</td>
+                                </tr>
+                                <tr>
+                                    <td><strong>Lines</strong></td>
+                                    <td>{% if license.start_line == license.end_line %} Line {{ license.start_line }} {% else %} Lines {{ license.start_line }} - {{ license.end_line }} {% endif %}</td>
                                 </tr>
+
                             </table>
                         </div>
                     </div>
diff --git a/scantext/templates/scantext/includes/license_summary_detail.html b/scantext/templates/scantext/includes/license_summary_detail.html
index 25990c3a5..0f1d95d75 100644
--- a/scantext/templates/scantext/includes/license_summary_detail.html
+++ b/scantext/templates/scantext/includes/license_summary_detail.html
@@ -9,40 +9,85 @@
     </div>
     <div class="column is-one-third">
         <article class="panel is-info">
-            <div class="panel-heading is-flex is-justify-content-space-between is-align-items-center py-0 px-3">
+            <div class="panel-heading py-2 px-4">
                 <p>Detected Licenses</p>
-                <div>
-                    <button class="button px-2 previous-btn is-info"><i class="fas fa-arrow-up"></i></button>
-                    <button class="button px-2 next-btn is-info"><i class="fas fa-arrow-down"></i></button>
-                </div>
             </div>
             {% for license in detected_licenses.licenses %}
-            <table class="license-table table is-fullwidth">
+            <p class="panel-tabs">
+                {% for rule in license.rules %}
+                <a class="">{{ rule }}</a>
+                {% endfor %}
+            </p>
+            <table class="license-table table is-striped is-hoverable mx-1">
                 <tbody>
                     <tr>
-                        <td><strong>Name</strong></td>
-                        <td>{{ license.name }}</td>
+                    <td><strong>Matched Rule</strong></td>
+                    <td>{{ license.rule }}</td>
+                    </tr>
+                    <tr>
+                        <td><strong>Key</strong></td>
+                        <td>{{ license.key }}</td>
                     </tr>
                     <tr>
                         <td><strong>Score</strong></td>
                         <td>{{ license.score }}</td>
                     </tr>
                     <tr>
-                        <td><strong>Owner</strong></td>
-                        <td>{{ license.owner }}</td>
+                        <td><strong>Name</strong></td>
+                        <td>{{ license.name }}</td>
+                    </tr>
+                    <tr>
+                        <td><strong>Short Name</strong></td>
+                        <td>{{ license.short_name }}</td>
                     </tr>
                     <tr>
                         <td><strong>Category</strong></td>
                         <td>{{ license.category }}</td>
                     </tr>
                     <tr>
-                        <td><strong>SPDX Key</strong></td>
-                        <td>{{ license.spdx_license_key }}</td>
+                        <td><strong>Lines</strong></td>
+                        <td>{% if license.start_line == license.end_line %} {{ license.start_line }} {% else %} {{ license.start_line }} - {{ license.end_line }} {% endif %}</td>
                     </tr>
                     <tr>
                         <td><strong>Reference</strong></td>
                         <td><a href="{{ license.reference_url }}" target="_blank">{{ license.reference_url }}</a></td>
                     </tr>
+                    <tr>
+                        <td><strong>Exection</strong></td>
+                        <td>{{ license.is_exception }}</td>
+                    </tr>
+                    <tr>
+                        <td><strong>Is Unknown</strong></td>
+                        <td>{{ license.is_unknown }}</td>
+                    </tr>
+                    <tr>
+                        <td><strong>Owner</strong></td>
+                        <td>{{ license.owner }}</td>
+                    </tr>
+                    <tr>
+                        <td><strong>Homepage</strong></td>
+                        <td>{{ license.homepage_url }}</td>
+                    </tr>
+                    <tr>
+                        <td><strong>Text URL</strong></td>
+                        <td>{{ license.text_url }}</td>
+                    </tr>
+                    <tr>
+                        <td><strong>Scancode Text URL</strong></td>
+                        <td>{{ license.scancode_text_url }}</td>
+                    </tr>
+                    <tr>
+                        <td><strong>Scancode Data URL</strong></td>
+                        <td>{{ license.scancode_data_url }}</td>
+                    </tr>
+                    <tr>
+                        <td><strong>SPDX License Key</strong></td>
+                        <td>{{ license.spdx_license_key }}</td>
+                    </tr>
+                    <tr>
+                        <td><strong>SPDX URL</strong></td>
+                        <td>{{ license.spdx_url }}</td>
+                    </tr>
                 </tbody>
             </table>
             {% endfor %}
diff --git a/scantext/views.py b/scantext/views.py
index 5dcb703d2..d4c1dbf55 100644
--- a/scantext/views.py
+++ b/scantext/views.py
@@ -156,6 +156,7 @@ def get_licenses(
     detected_licenses = []
     detected_expressions = []
 
+    # gets matches from a license file
     matches = idx.match(
         location=location,
         min_score=0,
@@ -167,6 +168,7 @@ def get_licenses(
     qspans = []
     match = None
     complete_text_in_array = []
+    # run through a list of matches
     for match in matches:
         qspans.append(match.qspan)
 
@@ -244,6 +246,8 @@ def _licenses_data_from_match(
         result["reference_url"] = license_url_template.format(lic.key)
         result["scancode_text_url"] = SCANCODE_LICENSE_TEXT_URL.format(lic.key)
         result["scancode_data_url"] = SCANCODE_LICENSE_DATA_URL.format(lic.key)
+        result["rule"] = match.rule.license_expression
+        result["rules"] = match.rule.license_keys()
 
         spdx_key = lic.spdx_license_key
         result["spdx_license_key"] = spdx_key
@@ -316,11 +320,11 @@ def get_highlighted_lines(
     }
 
     .not-matched {
-        color: #ac0000;
+        background-color: #ff0000;
     }
 
     .matched {
-        color: #00ac00;
+        background-color: #00ff00;
     }
     </style>
     <div class="license-match">

From 658df1f8cba474748e06c8050eda616df01734a9 Mon Sep 17 00:00:00 2001
From: Akhil Raj <lf32.dev@gmail.com>
Date: Wed, 17 Aug 2022 12:35:17 +0530
Subject: [PATCH 33/59] Add details page to view matched license details #450

Signed-off-by: Akhil Raj <lf32.dev@gmail.com>
---
 .../includes/license_summary_cards.html       | 49 ++------------
 .../includes/license_summary_chart.html       | 10 +++
 .../includes/license_summary_detail.html      | 58 ++++-------------
 .../license_summary_more_details.html         | 65 +++++++++++++++++++
 .../templates/scantext/license_summary.html   | 31 +++++++++
 5 files changed, 121 insertions(+), 92 deletions(-)
 create mode 100644 scantext/templates/scantext/includes/license_summary_chart.html
 create mode 100644 scantext/templates/scantext/includes/license_summary_more_details.html

diff --git a/scantext/templates/scantext/includes/license_summary_cards.html b/scantext/templates/scantext/includes/license_summary_cards.html
index f099bbbd5..f80c3450e 100644
--- a/scantext/templates/scantext/includes/license_summary_cards.html
+++ b/scantext/templates/scantext/includes/license_summary_cards.html
@@ -50,63 +50,22 @@
                                     <td><strong>Name</strong></td>
                                     <td>{{ license.name }}</td>
                                 </tr>
-                                <tr>
-                                    <td><strong>Short Name</strong></td>
-                                    <td>{{ license.short_name }}</td>
-                                </tr>
                                 <tr>
                                     <td><strong>Lines</strong></td>
                                     <td>{% if license.start_line == license.end_line %} Line {{ license.start_line }} {% else %} Lines {{ license.start_line }} - {{ license.end_line }} {% endif %}</td>
                                 </tr>
-                                <tr>
-                                    <td><strong>Category</strong></td>
-                                    <td>{{ license.category }}</td>
-                                </tr>
-                                <tr>
-                                    <td><strong>Reference</strong></td>
-                                    <td><a href="{{ license.reference_url }}" target="_blank">{{ license.reference_url }}</a></td>
-                                </tr>
-                                <tr>
-                                    <td><strong>Exection</strong></td>
-                                    <td>{{ license.is_exception }}</td>
-                                </tr>
-                                <tr>
-                                    <td><strong>Is Unknown</strong></td>
-                                    <td>{{ license.is_unknown }}</td>
-                                </tr>
                                 <tr>
                                     <td><strong>Owner</strong></td>
                                     <td>{{ license.owner }}</td>
                                 </tr>
                                 <tr>
-                                    <td><strong>Homepage</strong></td>
-                                    <td>{{ license.homepage_url }}</td>
-                                </tr>
-                                <tr>
-                                    <td><strong>Text URL</strong></td>
-                                    <td>{{ license.text_url }}</td>
-                                </tr>
-                                <tr>
-                                    <td><strong>Scancode Text URL</strong></td>
-                                    <td>{{ license.scancode_text_url }}</td>
-                                </tr>
-                                <tr>
-                                    <td><strong>Scancode Data URL</strong></td>
-                                    <td>{{ license.scancode_data_url }}</td>
-                                </tr>
-                                <tr>
-                                    <td><strong>SPDX License Key</strong></td>
-                                    <td>{{ license.spdx_license_key }}</td>
-                                </tr>
-                                <tr>
-                                    <td><strong>SPDX URL</strong></td>
-                                    <td>{{ license.spdx_url }}</td>
+                                    <td><strong>Category</strong></td>
+                                    <td>{{ license.category }}</td>
                                 </tr>
                                 <tr>
-                                    <td><strong>Lines</strong></td>
-                                    <td>{% if license.start_line == license.end_line %} Line {{ license.start_line }} {% else %} Lines {{ license.start_line }} - {{ license.end_line }} {% endif %}</td>
+                                    <td><strong>Reference</strong></td>
+                                    <td><a href="{{ license.reference_url }}" target="_blank">{{ license.reference_url }}</a></td>
                                 </tr>
-
                             </table>
                         </div>
                     </div>
diff --git a/scantext/templates/scantext/includes/license_summary_chart.html b/scantext/templates/scantext/includes/license_summary_chart.html
new file mode 100644
index 000000000..13ddf0a54
--- /dev/null
+++ b/scantext/templates/scantext/includes/license_summary_chart.html
@@ -0,0 +1,10 @@
+<div class="mb-5 mx-1">
+   	<div>
+       	<p class="title is-5">Detected License Expressions</p>
+		<div id="pieChart"></div>
+	</div>
+	<div>
+       	<p class="title is-5">License Expressions Scores</p>
+		<div id="barChart_1"></div>
+	</div>
+</div>
\ No newline at end of file
diff --git a/scantext/templates/scantext/includes/license_summary_detail.html b/scantext/templates/scantext/includes/license_summary_detail.html
index 0f1d95d75..d25e50542 100644
--- a/scantext/templates/scantext/includes/license_summary_detail.html
+++ b/scantext/templates/scantext/includes/license_summary_detail.html
@@ -1,11 +1,15 @@
 <div class="columns mb-5 mx-1">
     <div class="column is-two-third">
         <p class="title is-5">Input License Text</p>
-        {% for text in detected_licenses.complete_text_in_array %}
+<!--         {% for text in detected_licenses.complete_text_in_array %}
         <section class="match-container box" style="white-space: pre-wrap; max-height: 70vh; overflow: scroll;">
             {{ text|safe }}
         </section>
-        {% endfor %}
+        {% endfor %} -->
+        <section class="match-container box" style="white-space: pre-wrap; max-height: 70vh; overflow: scroll;">
+            {{ detected_licenses.complete_text|safe }}
+        </section>
+
     </div>
     <div class="column is-one-third">
         <article class="panel is-info">
@@ -20,10 +24,6 @@
             </p>
             <table class="license-table table is-striped is-hoverable mx-1">
                 <tbody>
-                    <tr>
-                    <td><strong>Matched Rule</strong></td>
-                    <td>{{ license.rule }}</td>
-                    </tr>
                     <tr>
                         <td><strong>Key</strong></td>
                         <td>{{ license.key }}</td>
@@ -36,57 +36,21 @@
                         <td><strong>Name</strong></td>
                         <td>{{ license.name }}</td>
                     </tr>
-                    <tr>
-                        <td><strong>Short Name</strong></td>
-                        <td>{{ license.short_name }}</td>
-                    </tr>
-                    <tr>
-                        <td><strong>Category</strong></td>
-                        <td>{{ license.category }}</td>
-                    </tr>
                     <tr>
                         <td><strong>Lines</strong></td>
-                        <td>{% if license.start_line == license.end_line %} {{ license.start_line }} {% else %} {{ license.start_line }} - {{ license.end_line }} {% endif %}</td>
-                    </tr>
-                    <tr>
-                        <td><strong>Reference</strong></td>
-                        <td><a href="{{ license.reference_url }}" target="_blank">{{ license.reference_url }}</a></td>
-                    </tr>
-                    <tr>
-                        <td><strong>Exection</strong></td>
-                        <td>{{ license.is_exception }}</td>
-                    </tr>
-                    <tr>
-                        <td><strong>Is Unknown</strong></td>
-                        <td>{{ license.is_unknown }}</td>
+                        <td>{% if license.start_line == license.end_line %} Line {{ license.start_line }} {% else %} Lines {{ license.start_line }} - {{ license.end_line }} {% endif %}</td>
                     </tr>
                     <tr>
                         <td><strong>Owner</strong></td>
                         <td>{{ license.owner }}</td>
                     </tr>
                     <tr>
-                        <td><strong>Homepage</strong></td>
-                        <td>{{ license.homepage_url }}</td>
-                    </tr>
-                    <tr>
-                        <td><strong>Text URL</strong></td>
-                        <td>{{ license.text_url }}</td>
-                    </tr>
-                    <tr>
-                        <td><strong>Scancode Text URL</strong></td>
-                        <td>{{ license.scancode_text_url }}</td>
-                    </tr>
-                    <tr>
-                        <td><strong>Scancode Data URL</strong></td>
-                        <td>{{ license.scancode_data_url }}</td>
-                    </tr>
-                    <tr>
-                        <td><strong>SPDX License Key</strong></td>
-                        <td>{{ license.spdx_license_key }}</td>
+                        <td><strong>Category</strong></td>
+                        <td>{{ license.category }}</td>
                     </tr>
                     <tr>
-                        <td><strong>SPDX URL</strong></td>
-                        <td>{{ license.spdx_url }}</td>
+                        <td><strong>Reference</strong></td>
+                        <td><a href="{{ license.reference_url }}" target="_blank">{{ license.reference_url }}</a></td>
                     </tr>
                 </tbody>
             </table>
diff --git a/scantext/templates/scantext/includes/license_summary_more_details.html b/scantext/templates/scantext/includes/license_summary_more_details.html
new file mode 100644
index 000000000..1ee034635
--- /dev/null
+++ b/scantext/templates/scantext/includes/license_summary_more_details.html
@@ -0,0 +1,65 @@
+<div class="mb-5 mx-1">
+    <div class="column is-two-third">
+        <p class="title is-5">Detected Licenses</p>
+    </div>
+    {% for license in detected_licenses.licenses %}
+    <article class="panel is-info">
+        <div class="panel-heading py-2 px-4">
+            <p>{{ license.name }}</p>
+        </div>
+        <table class="license-table table is-striped is-bordered is-hoverable">
+            <tbody>
+                <tr>
+                    <td><strong>Rule</strong></td>
+                    <td>{{ license.rule }}</td>
+                    <td><strong>Key</strong></td>
+                    <td>{{ license.key }}</td>
+                    <td><strong>Score</strong></td>
+                    <td>{{ license.score }}</td>
+                </tr>
+                <tr>
+                    <td><strong>Name</strong></td>
+                    <td>{{ license.name }}</td>
+                    <td><strong>Short Name</strong></td>
+                    <td>{{ license.short_name }}</td>
+                    <td><strong>Category</strong></td>
+                    <td>{{ license.category }}</td>
+                </tr>
+                <tr>
+                    <td><strong>Line(s)</strong></td>
+                    <td>{% if license.start_line == license.end_line %} {{ license.start_line }} {% else %} {{ license.start_line }} - {{ license.end_line }} {% endif %}</td>
+                    <td><strong>Reference</strong></td>
+                    <td><a href="{{ license.reference_url }}" target="_blank">{{ license.reference_url }}</a></td>
+                    <td><strong>Execption</strong></td>
+                    <td>{{ license.is_exception }}</td>
+                </tr>
+                <tr>
+                    <td><strong>Is Unknown</strong></td>
+                    <td>{{ license.is_unknown }}</td>
+                    <td><strong>Owner</strong></td>
+                    <td>{{ license.owner }}</td>
+                    <td><strong>Homepage</strong></td>
+                    <td>{{ license.homepage_url }}</td>
+                </tr>
+                <tr>
+                    <td><strong>Text URL</strong></td>
+                    <td>{{ license.text_url }}</td>
+                    <td><strong>Scancode Text URL</strong></td>
+                    <td>{{ license.scancode_text_url }}</td>
+                    <td><strong>Scancode Data URL</strong></td>
+                    <td>{{ license.scancode_data_url }}</td>
+                </tr>
+                <tr>
+                    <td><strong>SPDX License Key</strong></td>
+                    <td>{{ license.spdx_license_key }}</td>
+                    <td><strong>SPDX URL</strong></td>
+                    <td>{{ license.spdx_url }}</td>
+                    <td><strong>Matched Rule</strong></td>
+                    <td>{{ license.matched_rule }}</td>
+                </tr>
+            </tbody>
+        </table>
+    </article>
+    {% endfor %}
+
+</div>
diff --git a/scantext/templates/scantext/license_summary.html b/scantext/templates/scantext/license_summary.html
index f95a7a96f..a6fc98e5d 100644
--- a/scantext/templates/scantext/license_summary.html
+++ b/scantext/templates/scantext/license_summary.html
@@ -18,6 +18,7 @@ <h1 class="title is-4">License Detection Summary</h1>
         <ul class="nav">
             <li><a>Summary In Ace</a></li>
             <li class="is-active"><a>Summary with Highlighted Text</a></li>
+            <li><a>License Details</a></li>
         </ul>
     </div>
 
@@ -30,12 +31,18 @@ <h1 class="title is-4">License Detection Summary</h1>
 
     <section class="tab-container">
         {% include 'scantext/includes/license_summary_detail.html' with detected_licenses=detected_licenses %}
+        {% include 'scantext/includes/license_summary_chart.html' with detected_licenses=detected_licenses %}
+    </section>
+
+    <section class="tab-container is-hidden">
+        {% include 'scantext/includes/license_summary_more_details.html' with detected_licenses=detected_licenses %}
     </section>
 </div>
 {% endblock %}
 
 {% block scripts %}
 <script src="{% static 'ace-1.4.12.min.js' %}" crossorigin="anonymous"></script>
+<script src="{% static 'billboard-3.0.1.pkgd.min.js' %}" crossorigin="anonymous"></script>
 <script type="text/javascript">
     let editor = ace.edit("editor", {
         mode: "ace/mode/text",
@@ -128,4 +135,28 @@ <h1 class="title is-4">License Detection Summary</h1>
         }
     }
 </script>
+{{ detected_licenses|json_script:"detected_licenses" }}
+<script type="text/javascript">
+    var chart = bb.generate({
+      data: {
+        columns: {{ detected_licenses.license_expressions_scores|safe }},
+        type: "donut",
+      },
+      bindto: "#pieChart"
+    });
+
+    var chart = bb.generate({
+      data: {
+        columns: {{ detected_licenses.license_expressions_scores|safe }},
+        type: "bar", // for ESM specify as: bar()
+      },
+      bar: {
+        width: {
+          ratio: 0.5
+        }
+      },
+      bindto: "#barChart_1"
+    });
+
+</script>
 {% endblock %}

From 8edf20776d9f8d66cc69f1aec86e93ae076def67 Mon Sep 17 00:00:00 2001
From: Akhil Raj <lf32.dev@gmail.com>
Date: Wed, 17 Aug 2022 12:36:30 +0530
Subject: [PATCH 34/59] Add a mini licenses file to test for development #450

Signed-off-by: Akhil Raj <lf32.dev@gmail.com>
---
 scantext/tests/data/licenses | 6 ++++++
 1 file changed, 6 insertions(+)
 create mode 100644 scantext/tests/data/licenses

diff --git a/scantext/tests/data/licenses b/scantext/tests/data/licenses
new file mode 100644
index 000000000..5890f4498
--- /dev/null
+++ b/scantext/tests/data/licenses
@@ -0,0 +1,6 @@
+                                 Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+
+[This is the first released version of the Lesser GPL
+

From 787133a96a722514ba8fb15fa26cfb4f2a0183f6 Mon Sep 17 00:00:00 2001
From: Akhil Raj <lf32.dev@gmail.com>
Date: Wed, 17 Aug 2022 12:39:16 +0530
Subject: [PATCH 35/59] Move import of Token to top #450

Signed-off-by: Akhil Raj <lf32.dev@gmail.com>
---
 scantext/tests/test_match_text.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/scantext/tests/test_match_text.py b/scantext/tests/test_match_text.py
index 6852bde77..2f46273fd 100644
--- a/scantext/tests/test_match_text.py
+++ b/scantext/tests/test_match_text.py
@@ -14,12 +14,12 @@
 from licensedcode import index
 from licensedcode import models
 from licensedcode.spans import Span
+
+from scantext.match_text import Token
 from scantext.match_text import get_full_matched_text
 from scantext.match_text import reportable_tokens
-from scantext.match_text import Token
 from scantext.match_text import tokenize_matched_text
 
-
 TEST_DATA_DIR = os.path.join(os.path.dirname(__file__), "data")
 
 

From 4a08ea9b8865e4e90e07602090f5b3f0384bf2a9 Mon Sep 17 00:00:00 2001
From: Akhil Raj <lf32.dev@gmail.com>
Date: Wed, 17 Aug 2022 12:53:01 +0530
Subject: [PATCH 36/59] Add `license_chart_data` to render charts #450

Signed-off-by: Akhil Raj <lf32.dev@gmail.com>
---
 scantext/views.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/scantext/views.py b/scantext/views.py
index d4c1dbf55..3f736b484 100644
--- a/scantext/views.py
+++ b/scantext/views.py
@@ -155,6 +155,7 @@ def get_licenses(
 
     detected_licenses = []
     detected_expressions = []
+    detected_expressions_with_scores = []
 
     # gets matches from a license file
     matches = idx.match(
@@ -173,7 +174,7 @@ def get_licenses(
         qspans.append(match.qspan)
 
         detected_expressions.append(match.rule.license_expression)
-
+        detected_expressions_with_scores.append([match.rule.license_expression, match.score()])
         detected_licenses.extend(
             _licenses_data_from_match(
                 match=match,
@@ -202,6 +203,7 @@ def get_licenses(
         [
             ("licenses", detected_licenses),
             ("license_expressions", detected_expressions),
+            ("license_expressions_scores", detected_expressions_with_scores),
             ("percentage_of_license_text", percentage_of_license_text),
             ("complete_text_in_array", complete_text_in_array),
         ]

From 32c3931ad1242fdbc63b0c70fce42ba54eb78d88 Mon Sep 17 00:00:00 2001
From: Akhil Raj <lf32.dev@gmail.com>
Date: Wed, 17 Aug 2022 12:59:03 +0530
Subject: [PATCH 37/59] Add eye friendly green color to match highlights #450

Signed-off-by: Akhil Raj <lf32.dev@gmail.com>
---
 .../scantext/includes/license_summary_detail.html |  8 ++++----
 scantext/views.py                                 | 15 +++------------
 2 files changed, 7 insertions(+), 16 deletions(-)

diff --git a/scantext/templates/scantext/includes/license_summary_detail.html b/scantext/templates/scantext/includes/license_summary_detail.html
index d25e50542..0dec99a63 100644
--- a/scantext/templates/scantext/includes/license_summary_detail.html
+++ b/scantext/templates/scantext/includes/license_summary_detail.html
@@ -1,14 +1,14 @@
 <div class="columns mb-5 mx-1">
     <div class="column is-two-third">
         <p class="title is-5">Input License Text</p>
-<!--         {% for text in detected_licenses.complete_text_in_array %}
+        {% for text in detected_licenses.complete_text_in_array %}
         <section class="match-container box" style="white-space: pre-wrap; max-height: 70vh; overflow: scroll;">
             {{ text|safe }}
         </section>
-        {% endfor %} -->
-        <section class="match-container box" style="white-space: pre-wrap; max-height: 70vh; overflow: scroll;">
+        {% endfor %}
+<!--         <section class="match-container box" style="white-space: pre-wrap; max-height: 70vh; overflow: scroll;">
             {{ detected_licenses.complete_text|safe }}
-        </section>
+        </section> -->
 
     </div>
     <div class="column is-one-third">
diff --git a/scantext/views.py b/scantext/views.py
index 3f736b484..4f2172aa1 100644
--- a/scantext/views.py
+++ b/scantext/views.py
@@ -311,22 +311,13 @@ def get_highlighted_lines(
     tokens = tag_matched_tokens(tokens=tokens, match_qspan=match.qspan)
 
     header = """<style>
-    .license-match.log {
-        color: #f1f1f1;
-        background-color: #222;
-        font-family: monospace;
-    }
-
-    .license-match.wrap {
+    .license-match {
+        font-size: 18px;
         white-space: pre-wrap;
     }
 
-    .not-matched {
-        background-color: #ff0000;
-    }
-
     .matched {
-        background-color: #00ff00;
+        background-color: rgba(30, 220, 90, 0.3);
     }
     </style>
     <div class="license-match">

From 4442f7c30e1122ca5783119f47ad0e3e6b997c5e Mon Sep 17 00:00:00 2001
From: Akhil Raj <lf32.dev@gmail.com>
Date: Fri, 19 Aug 2022 00:30:13 +0530
Subject: [PATCH 38/59] Highlight all detected licenses in one match #450

Signed-off-by: Akhil Raj <lf32.dev@gmail.com>
---
 scantext/match_text.py                        |   7 --
 .../includes/license_summary_cards.html       |   2 +-
 .../includes/license_summary_detail.html      | 119 ++++++++++--------
 .../templates/scantext/license_summary.html   |   9 +-
 scantext/views.py                             |  99 +++++++--------
 5 files changed, 117 insertions(+), 119 deletions(-)

diff --git a/scantext/match_text.py b/scantext/match_text.py
index f621872b0..910dee06a 100644
--- a/scantext/match_text.py
+++ b/scantext/match_text.py
@@ -174,13 +174,6 @@ def _tokenize_matched_text(
                 # 2. to ensure the number of tokens is the same in both
                 # tokenizers (though, of course, the case will differ as the
                 # regular query tokenizer ignores case and punctuations).
-
-                # NOTE: we have a rare Unicode bug/issue because of some Unicode
-                # codepoint such as some Turkish characters that decompose to
-                # char + punct when casefolded. This should be fixed in Unicode
-                # release 14 and up and likely implemented in Python 3.10 and up
-                # See https://github.com/nexB/scancode-toolkit/issues/1872
-                # See also: https://bugs.python.org/issue34723#msg359514
                 qtokenized = list(index_tokenizer(token_str))
                 if not qtokenized:
 
diff --git a/scantext/templates/scantext/includes/license_summary_cards.html b/scantext/templates/scantext/includes/license_summary_cards.html
index f80c3450e..bdf6e3943 100644
--- a/scantext/templates/scantext/includes/license_summary_cards.html
+++ b/scantext/templates/scantext/includes/license_summary_cards.html
@@ -51,7 +51,7 @@
                                     <td>{{ license.name }}</td>
                                 </tr>
                                 <tr>
-                                    <td><strong>Lines</strong></td>
+                                    <td><strong>Line(s)</strong></td>
                                     <td>{% if license.start_line == license.end_line %} Line {{ license.start_line }} {% else %} Lines {{ license.start_line }} - {{ license.end_line }} {% endif %}</td>
                                 </tr>
                                 <tr>
diff --git a/scantext/templates/scantext/includes/license_summary_detail.html b/scantext/templates/scantext/includes/license_summary_detail.html
index 0dec99a63..245a663af 100644
--- a/scantext/templates/scantext/includes/license_summary_detail.html
+++ b/scantext/templates/scantext/includes/license_summary_detail.html
@@ -1,60 +1,79 @@
 <div class="columns mb-5 mx-1">
     <div class="column is-two-third">
         <p class="title is-5">Input License Text</p>
-        {% for text in detected_licenses.complete_text_in_array %}
-        <section class="match-container box" style="white-space: pre-wrap; max-height: 70vh; overflow: scroll;">
-            {{ text|safe }}
-        </section>
-        {% endfor %}
-<!--         <section class="match-container box" style="white-space: pre-wrap; max-height: 70vh; overflow: scroll;">
+        <div class="match-container box" style="white-space: pre-wrap; max-height: 70vh; overflow: scroll;">
             {{ detected_licenses.complete_text|safe }}
-        </section> -->
-
+        </div>
     </div>
     <div class="column is-one-third">
-        <article class="panel is-info">
-            <div class="panel-heading py-2 px-4">
-                <p>Detected Licenses</p>
-            </div>
+        <p class="title is-5">Detected Licenses</p>
+        <div class="card">
             {% for license in detected_licenses.licenses %}
-            <p class="panel-tabs">
-                {% for rule in license.rules %}
-                <a class="">{{ rule }}</a>
-                {% endfor %}
-            </p>
-            <table class="license-table table is-striped is-hoverable mx-1">
-                <tbody>
-                    <tr>
-                        <td><strong>Key</strong></td>
-                        <td>{{ license.key }}</td>
-                    </tr>
-                    <tr>
-                        <td><strong>Score</strong></td>
-                        <td>{{ license.score }}</td>
-                    </tr>
-                    <tr>
-                        <td><strong>Name</strong></td>
-                        <td>{{ license.name }}</td>
-                    </tr>
-                    <tr>
-                        <td><strong>Lines</strong></td>
-                        <td>{% if license.start_line == license.end_line %} Line {{ license.start_line }} {% else %} Lines {{ license.start_line }} - {{ license.end_line }} {% endif %}</td>
-                    </tr>
-                    <tr>
-                        <td><strong>Owner</strong></td>
-                        <td>{{ license.owner }}</td>
-                    </tr>
-                    <tr>
-                        <td><strong>Category</strong></td>
-                        <td>{{ license.category }}</td>
-                    </tr>
-                    <tr>
-                        <td><strong>Reference</strong></td>
-                        <td><a href="{{ license.reference_url }}" target="_blank">{{ license.reference_url }}</a></td>
-                    </tr>
-                </tbody>
-            </table>
+            <div class="licenses-card">
+                <div class="card-header is-flex is-justify-content-space-between">
+                    <div class="card-header-title" title="{{ license.short_name }}">
+                        {% if license.homepage_url %}
+                        <a href="{{ license.homepage_url }}" title="{{ license.name }} ({{ license.short_name }})">{{ license.short_name }}</a> {% else %} {{ license.short_name }} {% endif %}
+                    </div>
+                    <div class="is-flex is-justify-content-row is-align-items-center">
+                        <p class="lines tag is-6 mx-1 is-light is-info">
+                            {% if license.start_line == license.end_line %} Line {{ license.start_line }} {% else %} Lines {{ license.start_line }} - {{ license.end_line }} {% endif %}
+                        </p>
+                        <p class="tag is-6 mx-1 is-light 
+                  {% if license.score == 100 %} is-success {% else %} is-warning {% endif %}">{{ license.score }}</p>
+                        <p class="card-header-icon" data-key="{{ license.key }}" data-startline="{{ license.start_line }}" data-endline="{{ license.end_line }}">
+                            <span class="icon">
+                    <i class="fa fa-angle-down" aria-hidden="true"></i>
+                </span>
+                        </p>
+                    </div>
+                </div>
+                <div class="card-content is-hidden">
+                    <div class="content">
+                        <div class="is-flex is-justify-content-space-between">
+                            <div>
+                                <p class="title is-5">Details</p>
+                            </div>
+                            <div>
+                                <p class="tag is-6 mx-1 is-light is-info">{% if license.start_line == license.end_line %} Line {{ license.start_line }} {% else %} Lines {{ license.start_line }} - {{ license.end_line }} {% endif %}</p>
+                            </div>
+                        </div>
+                        <div class="table-container mt-3">
+                            <table class="table is-striped is-narrow is-hoverable is-fullwidth">
+                                <tr>
+                                    <td><strong>Key</strong></td>
+                                    <td>{{ license.key }}</td>
+                                </tr>
+                                <tr>
+                                    <td><strong>Score</strong></td>
+                                    <td>{{ license.score }}</td>
+                                </tr>
+                                <tr>
+                                    <td><strong>Name</strong></td>
+                                    <td>{{ license.name }}</td>
+                                </tr>
+                                <tr>
+                                    <td><strong>Line(s)</strong></td>
+                                    <td>{% if license.start_line == license.end_line %} Line {{ license.start_line }} {% else %} Lines {{ license.start_line }} - {{ license.end_line }} {% endif %}</td>
+                                </tr>
+                                <tr>
+                                    <td><strong>Owner</strong></td>
+                                    <td>{{ license.owner }}</td>
+                                </tr>
+                                <tr>
+                                    <td><strong>Category</strong></td>
+                                    <td>{{ license.category }}</td>
+                                </tr>
+                                <tr>
+                                    <td><strong>Reference</strong></td>
+                                    <td><a href="{{ license.reference_url }}" target="_blank">{{ license.reference_url }}</a></td>
+                                </tr>
+                            </table>
+                        </div>
+                    </div>
+                </div>
+            </div>
             {% endfor %}
-        </article>
+        </div>
     </div>
 </div>
diff --git a/scantext/templates/scantext/license_summary.html b/scantext/templates/scantext/license_summary.html
index a6fc98e5d..1b05f3089 100644
--- a/scantext/templates/scantext/license_summary.html
+++ b/scantext/templates/scantext/license_summary.html
@@ -13,11 +13,10 @@ <h1 class="title is-4">License Detection Summary</h1>
         </div>
     </section>
 
-    <!-- still under implementation -->
     <div class="tabs">
         <ul class="nav">
-            <li><a>Summary In Ace</a></li>
-            <li class="is-active"><a>Summary with Highlighted Text</a></li>
+            <li class="is-active"><a>Summary In Ace</a></li>
+            <li><a>Summary with Highlighted Text</a></li>
             <li><a>License Details</a></li>
         </ul>
     </div>
@@ -25,11 +24,11 @@ <h1 class="title is-4">License Detection Summary</h1>
     {% include 'scantext/includes/license_summary_header.html' with detected_licenses=detected_licenses %}
     <hr class="mx-1">
 
-    <section class="tab-container is-hidden">
+    <section class="tab-container">
         {% include 'scantext/includes/license_summary_cards.html' with detected_licenses=detected_licenses %}
     </section>
 
-    <section class="tab-container">
+    <section class="tab-container is-hidden">
         {% include 'scantext/includes/license_summary_detail.html' with detected_licenses=detected_licenses %}
         {% include 'scantext/includes/license_summary_chart.html' with detected_licenses=detected_licenses %}
     </section>
diff --git a/scantext/views.py b/scantext/views.py
index 4f2172aa1..17819234b 100644
--- a/scantext/views.py
+++ b/scantext/views.py
@@ -22,14 +22,19 @@
 
 import sys
 import tempfile
+from pprint import pprint
 
 from django.conf import settings
 from django.contrib import messages
 from django.shortcuts import render
 
 import attr
+from licensedcode import query
 from licensedcode.match import tokenize_matched_text
+from licensedcode.spans import Span
 from licensedcode.stopwords import STOPWORDS
+from licensedcode.tokenize import index_tokenizer
+from licensedcode.tokenize import matched_query_text_tokenizer
 
 from scantext.forms import LicenseScanForm
 
@@ -116,9 +121,6 @@ def license_scanview(request):
                 "detected_licenses": expressions,
             },
         )
-    # if TRACE_HIGHLIGHTED_TEXT:
-    #     from pprint import pprint
-    #     pprint(expressions, indent=4)
 
     return render(
         request,
@@ -136,18 +138,6 @@ def get_licenses(
     deadline=sys.maxsize,
     **kwargs,
 ):
-    """
-    Return a mapping or detected_licenses for licenses detected in the file at
-    `location`
-    This mapping contains two keys:
-     - 'licenses' with a value that is list of mappings of license information.
-     - 'license_expressions' with a value that is list of license expression
-       strings.
-    `min_score` is a minimum score threshold from 0 to 100. The default is 0,
-    meaning that all license matches are returned. If specified, matches with a
-    score lower than `minimum_score` are not returned.
-    By Default ``unknown_licenses`` is set to True to detect unknown licenses.
-    """
     from licensedcode.cache import get_index
     from licensedcode.spans import Span
 
@@ -169,12 +159,14 @@ def get_licenses(
     qspans = []
     match = None
     complete_text_in_array = []
-    # run through a list of matches
+
     for match in matches:
         qspans.append(match.qspan)
 
         detected_expressions.append(match.rule.license_expression)
-        detected_expressions_with_scores.append([match.rule.license_expression, match.score()])
+        detected_expressions_with_scores.append(
+            [match.rule.license_expression, match.score()]
+        )
         detected_licenses.extend(
             _licenses_data_from_match(
                 match=match,
@@ -182,13 +174,11 @@ def get_licenses(
             )
         )
 
-        complete_text_in_array.append(
-            get_highlighted_lines(
-                match=match,
-                stopwords=STOPWORDS,
-                trace=TRACE_HIGHLIGHTED_TEXT,
-            )
-        )
+    complete_text = get_highlighted_lines(
+        matches=matches,
+        stopwords=STOPWORDS,
+        trace=TRACE_HIGHLIGHTED_TEXT,
+    )
 
     percentage_of_license_text = 0
     if match:
@@ -202,10 +192,10 @@ def get_licenses(
     return dict(
         [
             ("licenses", detected_licenses),
+            ("complete_text", complete_text),
             ("license_expressions", detected_expressions),
             ("license_expressions_scores", detected_expressions_with_scores),
             ("percentage_of_license_text", percentage_of_license_text),
-            ("complete_text_in_array", complete_text_in_array),
         ]
     )
 
@@ -292,62 +282,59 @@ def logger_debug(*args):
 
 
 def get_highlighted_lines(
-    match,
+    matches,
     stopwords=STOPWORDS,
     trace=TRACE_HIGHLIGHTED_TEXT,
 ):
-    """
-    Yield highlighted text lines (with line returns) for the whole
-    of the matched and unmatched text of a ``query``.
-    """
-    query = match.query
+    tokens = []
+
+    query = matches[0].query
     tokens = tokenize_matched_text(
         location=query.location,
         query_string=query.query_string,
         dictionary=query.idx.dictionary,
-        start_line=match.query.start_line,
+        start_line=query.start_line,
         _cache={},
     )
-    tokens = tag_matched_tokens(tokens=tokens, match_qspan=match.qspan)
+
+    class_position = 1
+    for match in matches:
+        tokens = tag_matched_tokens(tokens=tokens, match_qspan=match.qspan, class_position=class_position)
+        class_position += 1
 
     header = """<style>
-    .license-match {
-        font-size: 18px;
-        white-space: pre-wrap;
-    }
-
-    .matched {
-        background-color: rgba(30, 220, 90, 0.3);
-    }
-    </style>
-    <div class="license-match">
-    """
+    .license-match {font-size: 18px;white-space: pre-wrap;}
+    .matched1 {background-color: rgba(30, 220, 90, 0.3);}
+    .matched2 {background-color: rgba(30, 90, 220, 0.3);}
+    .matched3 {background-color: rgba(220, 90, 30, 0.3);}
+    </style><div class="license-match">"""
+    body = ""
     footer = """</div>"""
+    highlight_matched = [
+        '<span class="matched1">{}</span>',
+        '<span class="matched2">{}</span>',
+        '<span class="matched3">{}</span>',
+    ]
+
 
-    body = ""
-    highlight_matched = '<span class="matched">{}</span>'
-    highlight_not_matched = '<span class="not-matched">{}</span>'
     for token in tokens:
         val = token.value
         if token.is_text and val.lower() not in stopwords:
             if token.is_matched:
-                body += highlight_matched.format(val)
+                body += highlight_matched[token.is_matched % 3].format(val)
             else:
-                body += highlight_not_matched.format(val)
+                body += val
         else:
             # we do not highlight punctuation and stopwords.
-            body += highlight_not_matched.format(val)
+            body += val
 
     return header + body + footer
 
 
-def tag_matched_tokens(tokens, match_qspan):
-    """
-    Yield Tokens from a ``tokens`` iterable of Token objects.
-    Known matched tokens are tagged as "is_matched=True" if they are matched.
-    """
+def tag_matched_tokens(tokens, match_qspan, color):
+
     for tok in tokens:
         # tagged known matched tokens (useful for highlighting)
         if tok.pos != -1 and tok.is_known and tok.pos in match_qspan:
-            tok = attr.evolve(tok, is_matched=True)
+            tok = attr.evolve(tok, is_matched=class_position)
         yield tok

From 840b3953c23ab9e55279fb456702e374542aad53 Mon Sep 17 00:00:00 2001
From: Akhil Raj <lf32.dev@gmail.com>
Date: Fri, 19 Aug 2022 00:36:19 +0530
Subject: [PATCH 39/59] Fix tests by running make valid

Signed-off-by: Akhil Raj <lf32.dev@gmail.com>
---
 scantext/views.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/scantext/views.py b/scantext/views.py
index 17819234b..4b558e852 100644
--- a/scantext/views.py
+++ b/scantext/views.py
@@ -299,7 +299,9 @@ def get_highlighted_lines(
 
     class_position = 1
     for match in matches:
-        tokens = tag_matched_tokens(tokens=tokens, match_qspan=match.qspan, class_position=class_position)
+        tokens = tag_matched_tokens(
+            tokens=tokens, match_qspan=match.qspan, class_position=class_position
+        )
         class_position += 1
 
     header = """<style>
@@ -316,7 +318,6 @@ def get_highlighted_lines(
         '<span class="matched3">{}</span>',
     ]
 
-
     for token in tokens:
         val = token.value
         if token.is_text and val.lower() not in stopwords:

From 60683d03bc8d1a4e1b21c559869c0d9580811140 Mon Sep 17 00:00:00 2001
From: Akhil Raj <lf32.dev@gmail.com>
Date: Fri, 19 Aug 2022 00:40:39 +0530
Subject: [PATCH 40/59] Match token args correction #450

Signed-off-by: Akhil Raj <lf32.dev@gmail.com>
---
 scantext/views.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scantext/views.py b/scantext/views.py
index 4b558e852..72ee220e8 100644
--- a/scantext/views.py
+++ b/scantext/views.py
@@ -332,7 +332,7 @@ def get_highlighted_lines(
     return header + body + footer
 
 
-def tag_matched_tokens(tokens, match_qspan, color):
+def tag_matched_tokens(tokens, match_qspan, class_position):
 
     for tok in tokens:
         # tagged known matched tokens (useful for highlighting)

From 68205f8d478dc3de8881194d0ceaacde3c1d3d7e Mon Sep 17 00:00:00 2001
From: Akhil Raj <lf32.dev@gmail.com>
Date: Fri, 19 Aug 2022 15:40:00 +0530
Subject: [PATCH 41/59] Add hyperlinks to urls in details #450

Signed-off-by: Akhil Raj <lf32.dev@gmail.com>
---
 .../scantext/includes/license_summary_cards.html       |  2 +-
 .../includes/license_summary_more_details.html         | 10 +++++-----
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/scantext/templates/scantext/includes/license_summary_cards.html b/scantext/templates/scantext/includes/license_summary_cards.html
index bdf6e3943..b1896c0a3 100644
--- a/scantext/templates/scantext/includes/license_summary_cards.html
+++ b/scantext/templates/scantext/includes/license_summary_cards.html
@@ -11,7 +11,7 @@
                 <div class="card-header is-flex is-justify-content-space-between">
                     <div class="card-header-title" title="{{ license.short_name }}">
                         {% if license.homepage_url %}
-                        <a href="{{ license.homepage_url }}" title="{{ license.name }} ({{ license.short_name }})">{{ license.short_name }}</a> {% else %} {{ license.short_name }} {% endif %}
+                        <a href="{{ license.homepage_url }}" title="{{ license.name }} ({{ license.short_name }})" target="_blank">{{ license.short_name }}</a> {% else %} {{ license.short_name }} {% endif %}
                     </div>
                     <div class="is-flex is-justify-content-row is-align-items-center">
                         <p class="lines tag is-6 mx-1 is-light is-info">
diff --git a/scantext/templates/scantext/includes/license_summary_more_details.html b/scantext/templates/scantext/includes/license_summary_more_details.html
index 1ee034635..8d4821e90 100644
--- a/scantext/templates/scantext/includes/license_summary_more_details.html
+++ b/scantext/templates/scantext/includes/license_summary_more_details.html
@@ -39,21 +39,21 @@
                     <td><strong>Owner</strong></td>
                     <td>{{ license.owner }}</td>
                     <td><strong>Homepage</strong></td>
-                    <td>{{ license.homepage_url }}</td>
+                    <td><a href="{{ license.homepage_url }}" target="_blank">{{ license.homepage_url }}</a></td>
                 </tr>
                 <tr>
                     <td><strong>Text URL</strong></td>
-                    <td>{{ license.text_url }}</td>
+                    <td><a href="{{ license.text_url }}" target="_blank">{{ license.text_url }}</a></td>
                     <td><strong>Scancode Text URL</strong></td>
-                    <td>{{ license.scancode_text_url }}</td>
+                    <td><a href="{{ license.scancode_text_url }}" target="_blank">{{ license.scancode_text_url }}</a></td>
                     <td><strong>Scancode Data URL</strong></td>
-                    <td>{{ license.scancode_data_url }}</td>
+                    <td><a href="{{ license.scancode_data_url }}" target="_blank">{{ license.scancode_data_url }}</a></td>
                 </tr>
                 <tr>
                     <td><strong>SPDX License Key</strong></td>
                     <td>{{ license.spdx_license_key }}</td>
                     <td><strong>SPDX URL</strong></td>
-                    <td>{{ license.spdx_url }}</td>
+                    <td><a href="{{ license.spdx_url }}" target="_blank">{{ license.spdx_url }}</a></td>
                     <td><strong>Matched Rule</strong></td>
                     <td>{{ license.matched_rule }}</td>
                 </tr>

From 968c5387654b0bc9836cd510b3579a104861a217 Mon Sep 17 00:00:00 2001
From: Akhil Raj <lf32.dev@gmail.com>
Date: Thu, 25 Aug 2022 14:13:10 +0530
Subject: [PATCH 42/59] Update details, Improve UI #450

* the details now will use sctk's add-license-detection
branch json styling

Signed-off-by: Akhil Raj <lf32.dev@gmail.com>
---
 .../includes/license_summary_cards.html       |  54 ++---
 .../includes/license_summary_detail.html      |  52 ++--
 .../includes/license_summary_header.html      |   2 +-
 .../license_summary_more_details.html         |  66 ++---
 .../templates/scantext/license_summary.html   |   7 +-
 scantext/tests/data/licenses                  |   7 +-
 scantext/views.py                             | 229 +++++++++---------
 7 files changed, 188 insertions(+), 229 deletions(-)

diff --git a/scantext/templates/scantext/includes/license_summary_cards.html b/scantext/templates/scantext/includes/license_summary_cards.html
index b1896c0a3..a43524208 100644
--- a/scantext/templates/scantext/includes/license_summary_cards.html
+++ b/scantext/templates/scantext/includes/license_summary_cards.html
@@ -6,73 +6,59 @@
     <div class="column is-one-third">
         <p class="title is-5">Detected Licenses</p>
         <div class="card">
-            {% for license in detected_licenses.licenses %}
+        {% for license in detected_licenses.licenses %}
             <div class="licenses-card">
                 <div class="card-header is-flex is-justify-content-space-between">
-                    <div class="card-header-title" title="{{ license.short_name }}">
-                        {% if license.homepage_url %}
-                        <a href="{{ license.homepage_url }}" title="{{ license.name }} ({{ license.short_name }})" target="_blank">{{ license.short_name }}</a> {% else %} {{ license.short_name }} {% endif %}
-                    </div>
+                    <div class="card-header-title" title="{{ license.short_name }}">{{ license.license_expression }}</div>
                     <div class="is-flex is-justify-content-row is-align-items-center">
-                        <p class="lines tag is-6 mx-1 is-light is-info">
-                            {% if license.start_line == license.end_line %} Line {{ license.start_line }} {% else %} Lines {{ license.start_line }} - {{ license.end_line }} {% endif %}
-                        </p>
-                        <p class="tag is-6 mx-1 is-light 
-                  {% if license.score == 100 %} is-success {% else %} is-warning {% endif %}">{{ license.score }}</p>
+                        <p class="lines tag is-6 mx-1 is-light is-info">{% if license.start_line == license.end_line %} Line {{ license.start_line }} {% else %} Lines {{ license.start_line }} - {{ license.end_line }} {% endif %}</p>
+                        <p class="tag is-6 mx-1 is-light {% if license.score == 100 %} is-success {% else %} is-warning {% endif %}">{{ license.score }}</p>
                         <p class="card-header-icon" data-key="{{ license.key }}" data-startline="{{ license.start_line }}" data-endline="{{ license.end_line }}">
                             <span class="icon">
-                    <i class="fa fa-angle-down" aria-hidden="true"></i>
-                </span>
+                                <i class="fa fa-angle-down" aria-hidden="true"></i>
+                            </span>
                         </p>
                     </div>
                 </div>
                 <div class="card-content is-hidden">
                     <div class="content">
-                        <div class="is-flex is-justify-content-space-between">
-                            <div>
-                                <p class="title is-5">Details</p>
-                            </div>
-                            <div>
-                                <p class="tag is-6 mx-1 is-light is-info">{% if license.start_line == license.end_line %} Line {{ license.start_line }} {% else %} Lines {{ license.start_line }} - {{ license.end_line }} {% endif %}</p>
-                            </div>
-                        </div>
+                        <p class="title is-5">Details</p>
                         <div class="table-container mt-3">
                             <table class="table is-striped is-narrow is-hoverable is-fullwidth">
                                 <tr>
-                                    <td><strong>Key</strong></td>
-                                    <td>{{ license.key }}</td>
+                                    <td><strong>License Expression</strong></td>
+                                    <td>{{ license.license_expression }}</td>
                                 </tr>
                                 <tr>
                                     <td><strong>Score</strong></td>
                                     <td>{{ license.score }}</td>
                                 </tr>
                                 <tr>
-                                    <td><strong>Name</strong></td>
-                                    <td>{{ license.name }}</td>
+                                    <td><strong>Matched Line(s)</strong></td>
+                                    <td>{% if license.start_line == license.end_line %}{{ license.start_line }}{% else %}{{ license.start_line }}-{{ license.end_line }} {% endif %}</td>
                                 </tr>
                                 <tr>
-                                    <td><strong>Line(s)</strong></td>
-                                    <td>{% if license.start_line == license.end_line %} Line {{ license.start_line }} {% else %} Lines {{ license.start_line }} - {{ license.end_line }} {% endif %}</td>
+                                    <td><strong>Matched Length</strong></td>
+                                    <td>{{ license.matched_length }}</td>
                                 </tr>
                                 <tr>
-                                    <td><strong>Owner</strong></td>
-                                    <td>{{ license.owner }}</td>
+                                    <td><strong>Matched Coverage</strong></td>
+                                    <td>{{ license.match_coverage }}</td>
                                 </tr>
                                 <tr>
-                                    <td><strong>Category</strong></td>
-                                    <td>{{ license.category }}</td>
+                                    <td><strong>Matcher</strong></td>
+                                    <td>{{ license.matcher }}</td>
                                 </tr>
                                 <tr>
-                                    <td><strong>Reference</strong></td>
-                                    <td><a href="{{ license.reference_url }}" target="_blank">{{ license.reference_url }}</a></td>
+                                    <td><strong>Rule Identifer</strong></td>
+                                    <td>{{ license.rule_identifier }}</td>
                                 </tr>
                             </table>
                         </div>
                     </div>
                 </div>
             </div>
-            {% endfor %}
-
+        {% endfor %}
         </div>
     </div>
 </div>
diff --git a/scantext/templates/scantext/includes/license_summary_detail.html b/scantext/templates/scantext/includes/license_summary_detail.html
index 245a663af..00c788059 100644
--- a/scantext/templates/scantext/includes/license_summary_detail.html
+++ b/scantext/templates/scantext/includes/license_summary_detail.html
@@ -1,8 +1,7 @@
 <div class="columns mb-5 mx-1">
     <div class="column is-two-third">
         <p class="title is-5">Input License Text</p>
-        <div class="match-container box" style="white-space: pre-wrap; max-height: 70vh; overflow: scroll;">
-            {{ detected_licenses.complete_text|safe }}
+        <div class="license-match" style="font-size: 18px; white-space: pre-wrap; max-height: 70vh; overflow: scroll;">{% for match in detected_licenses.complete_text %}<span class="matched{{ match.1 }}">{{ match.0 }}</span>{% endfor %}
         </div>
     </div>
     <div class="column is-one-third">
@@ -11,62 +10,49 @@
             {% for license in detected_licenses.licenses %}
             <div class="licenses-card">
                 <div class="card-header is-flex is-justify-content-space-between">
-                    <div class="card-header-title" title="{{ license.short_name }}">
-                        {% if license.homepage_url %}
-                        <a href="{{ license.homepage_url }}" title="{{ license.name }} ({{ license.short_name }})">{{ license.short_name }}</a> {% else %} {{ license.short_name }} {% endif %}
-                    </div>
+                    <div class="card-header-title" title="{{ license.short_name }}">{{ license.license_expression }}</div>
                     <div class="is-flex is-justify-content-row is-align-items-center">
-                        <p class="lines tag is-6 mx-1 is-light is-info">
-                            {% if license.start_line == license.end_line %} Line {{ license.start_line }} {% else %} Lines {{ license.start_line }} - {{ license.end_line }} {% endif %}
-                        </p>
-                        <p class="tag is-6 mx-1 is-light 
-                  {% if license.score == 100 %} is-success {% else %} is-warning {% endif %}">{{ license.score }}</p>
+                        <p class="lines tag is-6 mx-1 is-light is-info">{% if license.start_line == license.end_line %} Line {{ license.start_line }} {% else %} Lines {{ license.start_line }} - {{ license.end_line }} {% endif %}</p>
+                        <p class="tag is-6 mx-1 is-light {% if license.score == 100 %} is-success {% else %} is-warning {% endif %}">{{ license.score }}</p>
                         <p class="card-header-icon" data-key="{{ license.key }}" data-startline="{{ license.start_line }}" data-endline="{{ license.end_line }}">
                             <span class="icon">
-                    <i class="fa fa-angle-down" aria-hidden="true"></i>
-                </span>
+                                <i class="fa fa-angle-down" aria-hidden="true"></i>
+                            </span>
                         </p>
                     </div>
                 </div>
                 <div class="card-content is-hidden">
                     <div class="content">
-                        <div class="is-flex is-justify-content-space-between">
-                            <div>
-                                <p class="title is-5">Details</p>
-                            </div>
-                            <div>
-                                <p class="tag is-6 mx-1 is-light is-info">{% if license.start_line == license.end_line %} Line {{ license.start_line }} {% else %} Lines {{ license.start_line }} - {{ license.end_line }} {% endif %}</p>
-                            </div>
-                        </div>
+                        <p class="title is-5">Details</p>
                         <div class="table-container mt-3">
                             <table class="table is-striped is-narrow is-hoverable is-fullwidth">
                                 <tr>
-                                    <td><strong>Key</strong></td>
-                                    <td>{{ license.key }}</td>
+                                    <td><strong>License Expression</strong></td>
+                                    <td>{{ license.license_expression }}</td>
                                 </tr>
                                 <tr>
                                     <td><strong>Score</strong></td>
                                     <td>{{ license.score }}</td>
                                 </tr>
                                 <tr>
-                                    <td><strong>Name</strong></td>
-                                    <td>{{ license.name }}</td>
+                                    <td><strong>Matched Line(s)</strong></td>
+                                    <td>{% if license.start_line == license.end_line %}{{ license.start_line }}{% else %}{{ license.start_line }}-{{ license.end_line }} {% endif %}</td>
                                 </tr>
                                 <tr>
-                                    <td><strong>Line(s)</strong></td>
-                                    <td>{% if license.start_line == license.end_line %} Line {{ license.start_line }} {% else %} Lines {{ license.start_line }} - {{ license.end_line }} {% endif %}</td>
+                                    <td><strong>Matched Length</strong></td>
+                                    <td>{{ license.matched_length }}</td>
                                 </tr>
                                 <tr>
-                                    <td><strong>Owner</strong></td>
-                                    <td>{{ license.owner }}</td>
+                                    <td><strong>Matched Coverage</strong></td>
+                                    <td>{{ license.match_coverage }}</td>
                                 </tr>
                                 <tr>
-                                    <td><strong>Category</strong></td>
-                                    <td>{{ license.category }}</td>
+                                    <td><strong>Matcher</strong></td>
+                                    <td>{{ license.matcher }}</td>
                                 </tr>
                                 <tr>
-                                    <td><strong>Reference</strong></td>
-                                    <td><a href="{{ license.reference_url }}" target="_blank">{{ license.reference_url }}</a></td>
+                                    <td><strong>Rule Identifer</strong></td>
+                                    <td>{{ license.rule_identifier }}</td>
                                 </tr>
                             </table>
                         </div>
diff --git a/scantext/templates/scantext/includes/license_summary_header.html b/scantext/templates/scantext/includes/license_summary_header.html
index 67952188a..858f77752 100644
--- a/scantext/templates/scantext/includes/license_summary_header.html
+++ b/scantext/templates/scantext/includes/license_summary_header.html
@@ -12,7 +12,7 @@
         <div>
             <p class="heading">License Expressions</p>
             <p class="title">
-              <span>{{ detected_licenses.license_expressions|length }}</span>
+              <span>{{ detected_licenses.license_expressions_and_scores|length }}</span>
             </p>
         </div>
     </div>
diff --git a/scantext/templates/scantext/includes/license_summary_more_details.html b/scantext/templates/scantext/includes/license_summary_more_details.html
index 8d4821e90..c42bca335 100644
--- a/scantext/templates/scantext/includes/license_summary_more_details.html
+++ b/scantext/templates/scantext/includes/license_summary_more_details.html
@@ -5,61 +5,43 @@
     {% for license in detected_licenses.licenses %}
     <article class="panel is-info">
         <div class="panel-heading py-2 px-4">
-            <p>{{ license.name }}</p>
+            <p>{{ license.license_expression }}</p>
         </div>
-        <table class="license-table table is-striped is-bordered is-hoverable">
+        <table class="license-table table is-striped is-bordered is-hoverable is-fullwidth">
             <tbody>
                 <tr>
-                    <td><strong>Rule</strong></td>
-                    <td>{{ license.rule }}</td>
-                    <td><strong>Key</strong></td>
-                    <td>{{ license.key }}</td>
                     <td><strong>Score</strong></td>
                     <td>{{ license.score }}</td>
-                </tr>
-                <tr>
-                    <td><strong>Name</strong></td>
-                    <td>{{ license.name }}</td>
-                    <td><strong>Short Name</strong></td>
-                    <td>{{ license.short_name }}</td>
-                    <td><strong>Category</strong></td>
-                    <td>{{ license.category }}</td>
-                </tr>
-                <tr>
-                    <td><strong>Line(s)</strong></td>
+                    <td><strong>Matched Line(s)</strong></td>
                     <td>{% if license.start_line == license.end_line %} {{ license.start_line }} {% else %} {{ license.start_line }} - {{ license.end_line }} {% endif %}</td>
-                    <td><strong>Reference</strong></td>
-                    <td><a href="{{ license.reference_url }}" target="_blank">{{ license.reference_url }}</a></td>
-                    <td><strong>Execption</strong></td>
-                    <td>{{ license.is_exception }}</td>
+                    <td><strong>Rule Identifer</strong></td>
+                     <td>{{ license.rule_identifier }}</td>
                 </tr>
                 <tr>
-                    <td><strong>Is Unknown</strong></td>
-                    <td>{{ license.is_unknown }}</td>
-                    <td><strong>Owner</strong></td>
-                    <td>{{ license.owner }}</td>
-                    <td><strong>Homepage</strong></td>
-                    <td><a href="{{ license.homepage_url }}" target="_blank">{{ license.homepage_url }}</a></td>
-                </tr>
+                    <td><strong>Matcher</strong></td>
+                    <td>{{ license.matcher }}</td>
+                    <td><strong>Match Coverage</strong></td>
+                    <td>{{ license.match_coverage }}</td>
+                    <td><strong>Matched Length</strong></td>
+                    <td>{{ license.matched_length }}</td>
                 <tr>
-                    <td><strong>Text URL</strong></td>
-                    <td><a href="{{ license.text_url }}" target="_blank">{{ license.text_url }}</a></td>
-                    <td><strong>Scancode Text URL</strong></td>
-                    <td><a href="{{ license.scancode_text_url }}" target="_blank">{{ license.scancode_text_url }}</a></td>
-                    <td><strong>Scancode Data URL</strong></td>
-                    <td><a href="{{ license.scancode_data_url }}" target="_blank">{{ license.scancode_data_url }}</a></td>
+                    <td><strong>Data File</strong></td>
+                    <td><a href="https://github.com/nexB/scancode-toolkit/tree/add-license-detection/src/licensedcode/data/{% if '.RULE' in license.rule_identifier %}rules{% elif '.LICENSE' in license.rule_identifier %}licenses{% endif %}/{{ license.rule_identifier }}" target="_blank">{{ license.rule_identifier }}</a></td>
+                    <td><strong>Key(s)</strong></td>
+                    <td>
+                        {% for key in license.licenses %}
+                        <a href="{{ key.reference_url }}" target="_blank"><span class="mr-2">{{ key.key }}</span></a>
+                        {% endfor %}
+                    </td>
                 </tr>
                 <tr>
-                    <td><strong>SPDX License Key</strong></td>
-                    <td>{{ license.spdx_license_key }}</td>
-                    <td><strong>SPDX URL</strong></td>
-                    <td><a href="{{ license.spdx_url }}" target="_blank">{{ license.spdx_url }}</a></td>
-                    <td><strong>Matched Rule</strong></td>
-                    <td>{{ license.matched_rule }}</td>
+                    <td><strong>Rule Length</strong></td>
+                    <td>{{ license.rule_length }}</td>
+                    <td><strong>Rule Relevance</strong></td>
+                    <td>{{ license.rule_relevance }}</td>
                 </tr>
             </tbody>
         </table>
     </article>
     {% endfor %}
-
-</div>
+</div>
\ No newline at end of file
diff --git a/scantext/templates/scantext/license_summary.html b/scantext/templates/scantext/license_summary.html
index 1b05f3089..e7259e969 100644
--- a/scantext/templates/scantext/license_summary.html
+++ b/scantext/templates/scantext/license_summary.html
@@ -2,9 +2,13 @@
 {% load static humanize %}
 
 {% block content %}
+<style>
+    .matched1 {background-color: rgba(30, 220, 90, 0.3);}
+    .matched2 {background-color: rgba(30, 90, 220, 0.3);}
+    .matched3 {background-color: rgba(220, 90, 30, 0.3);}
+</style>
 <div class="container is-widescreen">
     {% include 'scanpipe/includes/navbar_header.html' %}
-    <div class="mx-5 mb-2">{% include 'scanpipe/includes/messages.html' %}</div>
 
     <section class="mx-5 mb-0">
         <div class="is-flex is-justify-content-space-between">
@@ -30,7 +34,6 @@ <h1 class="title is-4">License Detection Summary</h1>
 
     <section class="tab-container is-hidden">
         {% include 'scantext/includes/license_summary_detail.html' with detected_licenses=detected_licenses %}
-        {% include 'scantext/includes/license_summary_chart.html' with detected_licenses=detected_licenses %}
     </section>
 
     <section class="tab-container is-hidden">
diff --git a/scantext/tests/data/licenses b/scantext/tests/data/licenses
index 5890f4498..ea5c84937 100644
--- a/scantext/tests/data/licenses
+++ b/scantext/tests/data/licenses
@@ -1,6 +1,5 @@
-                                 Apache License
-                           Version 2.0, January 2004
-                        http://www.apache.org/licenses/
+Apache-2.0
 
-[This is the first released version of the Lesser GPL
+MIT
 
+Lesser GPL
\ No newline at end of file
diff --git a/scantext/views.py b/scantext/views.py
index 72ee220e8..9aa1dd393 100644
--- a/scantext/views.py
+++ b/scantext/views.py
@@ -40,9 +40,6 @@
 
 TRACE_HIGHLIGHTED_TEXT = True
 SCANCODE_REPO_URL = "https://github.com/nexB/scancode-toolkit"
-SCANCODE_BASE_URL = SCANCODE_REPO_URL + "/tree/develop/src/licensedcode/data/licenses"
-SCANCODE_LICENSE_TEXT_URL = SCANCODE_BASE_URL + "/{}.LICENSE"
-SCANCODE_LICENSE_DATA_URL = SCANCODE_BASE_URL + "/{}.yml"
 SPDX_LICENSE_URL = "https://spdx.org/licenses/{}"
 DEJACODE_LICENSE_URL = "https://enterprise.dejacode.com/urn/urn:dje:license:{}"
 SCANCODE_LICENSEDB_URL = "https://scancode-licensedb.aboutcode.org/{}"
@@ -110,18 +107,19 @@ def license_scanview(request):
                 },
             )
 
-    if not expressions["licenses"] and not expressions["license_expressions"]:
+    if not expressions:
         message = "Couldn't detect any license from the provided input."
-        messages.info(request, message)
+        messages.warning(request, message)
         return render(
             request,
-            "scantext/license_summary.html",
+            "scantext/license_scan_form.html",
             {
-                "text": input_text,
-                "detected_licenses": expressions,
+                "form": LicenseScanForm(),
             },
         )
 
+    # pprint(expressions)
+
     return render(
         request,
         "scantext/license_summary.html",
@@ -144,8 +142,7 @@ def get_licenses(
     idx = get_index()
 
     detected_licenses = []
-    detected_expressions = []
-    detected_expressions_with_scores = []
+    detected_expressions_and_scores = []
 
     # gets matches from a license file
     matches = idx.match(
@@ -156,21 +153,24 @@ def get_licenses(
         **kwargs,
     )
 
+    if not matches:
+        return False
+
     qspans = []
     match = None
-    complete_text_in_array = []
+    complete_text = None
 
     for match in matches:
         qspans.append(match.qspan)
-
-        detected_expressions.append(match.rule.license_expression)
-        detected_expressions_with_scores.append(
+        detected_expressions_and_scores.append(
             [match.rule.license_expression, match.score()]
         )
-        detected_licenses.extend(
-            _licenses_data_from_match(
+        detected_licenses.append(
+            get_mapping(
                 match=match,
+                include_text=True,
                 license_url_template=license_url_template,
+                spdx_license_url=SPDX_LICENSE_URL,
             )
         )
 
@@ -193,90 +193,12 @@ def get_licenses(
         [
             ("licenses", detected_licenses),
             ("complete_text", complete_text),
-            ("license_expressions", detected_expressions),
-            ("license_expressions_scores", detected_expressions_with_scores),
+            ("license_expressions_and_scores", detected_expressions_and_scores),
             ("percentage_of_license_text", percentage_of_license_text),
         ]
     )
 
 
-def _licenses_data_from_match(
-    match,
-    license_url_template=SCANCODE_LICENSEDB_URL,
-):
-    """
-    Return a list of "licenses" scan data built from a license match.
-    Used directly only internally for testing.
-    """
-    from licensedcode import cache
-
-    licenses = cache.get_licenses_db()
-
-    # Returned matched_text will also include the text detected
-    matched_text = match.matched_text(
-        whole_lines=False,
-        highlight=True,
-        highlight_matched="<matched>{}</matched>",
-        highlight_not_matched="<notmatched>{}</notmatched>",
-    )
-
-    detected_licenses = []
-    for license_key in match.rule.license_keys():
-        lic = licenses.get(license_key)
-        result = {}
-        detected_licenses.append(result)
-        result["key"] = lic.key
-        result["score"] = match.score()
-        result["name"] = lic.name
-        result["short_name"] = lic.short_name
-        result["category"] = lic.category
-        result["is_exception"] = lic.is_exception
-        result["is_unknown"] = lic.is_unknown
-        result["owner"] = lic.owner
-        result["homepage_url"] = lic.homepage_url
-        result["text_url"] = lic.text_urls[0] if lic.text_urls else ""
-        result["reference_url"] = license_url_template.format(lic.key)
-        result["scancode_text_url"] = SCANCODE_LICENSE_TEXT_URL.format(lic.key)
-        result["scancode_data_url"] = SCANCODE_LICENSE_DATA_URL.format(lic.key)
-        result["rule"] = match.rule.license_expression
-        result["rules"] = match.rule.license_keys()
-
-        spdx_key = lic.spdx_license_key
-        result["spdx_license_key"] = spdx_key
-
-        if spdx_key:
-            is_license_ref = spdx_key.lower().startswith("licenseref-")
-            if is_license_ref:
-                spdx_url = SCANCODE_LICENSE_TEXT_URL.format(lic.key)
-            else:
-                spdx_key = lic.spdx_license_key.rstrip("+")
-                spdx_url = SPDX_LICENSE_URL.format(spdx_key)
-        else:
-            spdx_url = ""
-        result["spdx_url"] = spdx_url
-        result["start_line"] = match.start_line
-        result["end_line"] = match.end_line
-        result["matched_text"] = matched_text
-        matched_rule = result["matched_rule"] = {}
-        matched_rule["identifier"] = match.rule.identifier
-        matched_rule["license_expression"] = match.rule.license_expression
-        matched_rule["licenses"] = match.rule.license_keys()
-        matched_rule["referenced_filenames"] = match.rule.referenced_filenames
-        matched_rule["is_license_text"] = match.rule.is_license_text
-        matched_rule["is_license_notice"] = match.rule.is_license_notice
-        matched_rule["is_license_reference"] = match.rule.is_license_reference
-        matched_rule["is_license_tag"] = match.rule.is_license_tag
-        matched_rule["is_license_intro"] = match.rule.is_license_intro
-        matched_rule["has_unknown"] = match.rule.has_unknown
-        matched_rule["matcher"] = match.matcher
-        matched_rule["rule_length"] = match.rule.length
-        matched_rule["matched_length"] = match.len()
-        matched_rule["match_coverage"] = match.coverage()
-        matched_rule["rule_relevance"] = match.rule.relevance
-
-    return detected_licenses
-
-
 def logger_debug(*args):
     pass
 
@@ -304,32 +226,19 @@ def get_highlighted_lines(
         )
         class_position += 1
 
-    header = """<style>
-    .license-match {font-size: 18px;white-space: pre-wrap;}
-    .matched1 {background-color: rgba(30, 220, 90, 0.3);}
-    .matched2 {background-color: rgba(30, 90, 220, 0.3);}
-    .matched3 {background-color: rgba(220, 90, 30, 0.3);}
-    </style><div class="license-match">"""
-    body = ""
-    footer = """</div>"""
-    highlight_matched = [
-        '<span class="matched1">{}</span>',
-        '<span class="matched2">{}</span>',
-        '<span class="matched3">{}</span>',
-    ]
-
+    body = []
     for token in tokens:
         val = token.value
         if token.is_text and val.lower() not in stopwords:
             if token.is_matched:
-                body += highlight_matched[token.is_matched % 3].format(val)
+                body.append([token.value, token.is_matched % 3])
             else:
-                body += val
+                body.append([token.value, -1])
         else:
             # we do not highlight punctuation and stopwords.
-            body += val
+            body.append([token.value, -1])
 
-    return header + body + footer
+    return body
 
 
 def tag_matched_tokens(tokens, match_qspan, class_position):
@@ -339,3 +248,97 @@ def tag_matched_tokens(tokens, match_qspan, class_position):
         if tok.pos != -1 and tok.is_known and tok.pos in match_qspan:
             tok = attr.evolve(tok, is_matched=class_position)
         yield tok
+
+
+def get_mapping(
+    match,
+    license_url_template,
+    spdx_license_url,
+    include_text=False,
+    license_text_diagnostics=False,
+):
+    """
+    Return a list of "matches" scan data built from a license match.
+    """
+    from licensedcode import cache
+
+    licenses = cache.get_licenses_db()
+
+    matched_text = None
+    if include_text:
+        if license_text_diagnostics:
+            matched_text = match.matched_text(whole_lines=False, highlight=True)
+        else:
+            matched_text = match.matched_text(whole_lines=False, highlight=False)
+
+    SCANCODE_BASE_URL = (
+        "https://github.com/nexB/scancode-toolkit/tree/develop/src/licensedcode/data/"
+    )
+    SCANCODE_LICENSE_TEXT_URL = SCANCODE_BASE_URL + "/{}.LICENSE"
+    SCANCODE_LICENSE_DATA_URL = SCANCODE_BASE_URL + "/{}.yml"
+
+    result = {}
+
+    # Detection Level Information
+    result["score"] = match.score()
+    result["start_line"] = match.start_line
+    result["end_line"] = match.end_line
+    result["matched_length"] = match.len()
+    result["match_coverage"] = match.coverage()
+    result["matcher"] = match.matcher
+
+    # LicenseDB Level Information (Rule that was matched)
+    result["license_expression"] = match.rule.license_expression
+    result["rule_identifier"] = match.rule.identifier  # .RULE OR .LICENSE
+    result["referenced_filenames"] = match.rule.referenced_filenames
+    result["is_license_text"] = match.rule.is_license_text
+    result["is_license_notice"] = match.rule.is_license_notice
+    result["is_license_reference"] = match.rule.is_license_reference
+    result["is_license_tag"] = match.rule.is_license_tag
+    result["is_license_intro"] = match.rule.is_license_intro
+    result["rule_length"] = match.rule.length
+    result["rule_relevance"] = match.rule.relevance
+    if include_text:
+        result["matched_text"] = matched_text
+
+    # License Level Information (Individual licenses that this rule refers to)
+    result["licenses"] = detected_licenses = []
+    for license_key in match.rule.license_keys():
+        detected_license = {}
+        detected_licenses.append(detected_license)
+
+        lic = licenses.get(license_key)
+
+        detected_license["key"] = lic.key
+        detected_license["name"] = lic.name
+        detected_license["short_name"] = lic.short_name
+        detected_license["category"] = lic.category
+        detected_license["is_exception"] = lic.is_exception
+        detected_license["is_unknown"] = lic.is_unknown
+        detected_license["owner"] = lic.owner
+        detected_license["homepage_url"] = lic.homepage_url
+        detected_license["text_url"] = lic.text_urls[0] if lic.text_urls else ""
+        detected_license["reference_url"] = license_url_template.format(lic.key)
+        detected_license["scancode_text_url"] = SCANCODE_LICENSE_TEXT_URL.format(
+            lic.key
+        )
+        detected_license["scancode_data_url"] = SCANCODE_LICENSE_DATA_URL.format(
+            lic.key
+        )
+
+        spdx_key = lic.spdx_license_key
+        detected_license["spdx_license_key"] = spdx_key
+
+        if spdx_key:
+            is_license_ref = spdx_key.lower().startswith("licenseref-")
+            if is_license_ref:
+                spdx_url = SCANCODE_LICENSE_TEXT_URL.format(lic.key)
+            else:
+                # TODO: Is this replacing spdx_key???
+                spdx_key = lic.spdx_license_key.rstrip("+")
+                spdx_url = spdx_license_url.format(spdx_key)
+        else:
+            spdx_url = ""
+        detected_license["spdx_url"] = spdx_url
+
+    return result

From 6cd024306099cee86942482aa7210c993f846630 Mon Sep 17 00:00:00 2001
From: Akhil Raj <lf32.dev@gmail.com>
Date: Sat, 27 Aug 2022 16:50:00 +0530
Subject: [PATCH 43/59] Ace editor upgraded from v1.4.12 to v1.9.5

Signed-off-by: Akhil Raj <lf32.dev@gmail.com>
---
 scantext/templates/scantext/license_summary.html | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scantext/templates/scantext/license_summary.html b/scantext/templates/scantext/license_summary.html
index e7259e969..4ff64e789 100644
--- a/scantext/templates/scantext/license_summary.html
+++ b/scantext/templates/scantext/license_summary.html
@@ -43,7 +43,7 @@ <h1 class="title is-4">License Detection Summary</h1>
 {% endblock %}
 
 {% block scripts %}
-<script src="{% static 'ace-1.4.12.min.js' %}" crossorigin="anonymous"></script>
+<script src="{% static 'ace-1.9.5.min.js' %}" crossorigin="anonymous"></script>
 <script src="{% static 'billboard-3.0.1.pkgd.min.js' %}" crossorigin="anonymous"></script>
 <script type="text/javascript">
     let editor = ace.edit("editor", {

From 65277f13d6b79c26fe57d44eff8667d5b83b0a1e Mon Sep 17 00:00:00 2001
From: Akhil Raj <lf32.dev@gmail.com>
Date: Wed, 31 Aug 2022 14:24:19 +0530
Subject: [PATCH 44/59] Improve details, highlights and tests

- Rearrange details and improve hyperlinking
- Improve highlighting colors
- Write tests for modules

Signed-off-by: Akhil Raj <lf32.dev@gmail.com>
---
 scantext/match_text.py                        | 325 +-----------------
 ...more_details.html => license_details.html} |  24 +-
 .../includes/license_summary_detail.html      |   7 +-
 ...cards.html => license_summary_editor.html} |   8 +-
 .../includes/license_summary_header.html      |   7 +-
 .../templates/scantext/license_summary.html   |  38 +-
 scantext/tests/test_views.py                  |  72 ++++
 scantext/views.py                             | 235 ++++++++-----
 8 files changed, 251 insertions(+), 465 deletions(-)
 rename scantext/templates/scantext/includes/{license_summary_more_details.html => license_details.html} (71%)
 rename scantext/templates/scantext/includes/{license_summary_cards.html => license_summary_editor.html} (96%)
 create mode 100644 scantext/tests/test_views.py

diff --git a/scantext/match_text.py b/scantext/match_text.py
index 910dee06a..9e5db2dff 100644
--- a/scantext/match_text.py
+++ b/scantext/match_text.py
@@ -59,8 +59,8 @@ def _debug_print_matched_query_text(match, extras=5):
         logger_debug(qt)
 
 
-@attr.s(slots=True, frozen=True)
-class Token(object):
+@attr.s(slots=True)
+class Token:
     """
     Used to represent a token in collected query-side matched texts and SPDX
     identifiers.
@@ -99,42 +99,11 @@ class Token(object):
     # True if this is a known token
     is_known = attr.ib(default=False)
 
-    # List of LicenseMatch that match this token
-    matches = attr.ib(attr.Factory(list))
+    # List of LicenseMatch ids that match this token
+    match_ids = attr.ib(attr.Factory(list))
 
 
 def tokenize_matched_text(
-    location,
-    query_string,
-    dictionary,
-    start_line=1,
-    _cache={},
-):
-    """
-    Return a list of Token objects with pos and line number collected from the
-    file at `location` or the `query_string` string. `dictionary` is the index
-    mapping a token string to a token id.
-
-    NOTE: the _cache={} arg IS A GLOBAL mutable by design.
-    """
-    key = location, query_string, start_line
-    cached = _cache.get(key)
-    if cached:
-        return cached
-    # we only cache the last call
-    _cache.clear()
-    _cache[key] = result = list(
-        _tokenize_matched_text(
-            location=location,
-            query_string=query_string,
-            dictionary=dictionary,
-            start_line=start_line,
-        )
-    )
-    return result
-
-
-def _tokenize_matched_text(
     location,
     query_string,
     dictionary,
@@ -156,7 +125,7 @@ def _tokenize_matched_text(
     for line_num, line in qls:
         if trace:
             logger_debug(
-                "  _tokenize_matched_text:", "line_num:", line_num, "line:", line
+                "  tokenize_matched_text:", "line_num:", line_num, "line:", line
             )
 
         for is_text, token_str in matched_query_text_tokenizer(line):
@@ -227,287 +196,3 @@ def _tokenize_matched_text(
                     is_known=False,
                     pos=-1,
                 )
-
-
-def reportable_tokens(
-    tokens,
-    match_qspan,
-    start_line,
-    end_line,
-    whole_lines=False,
-    trace=TRACE_MATCHED_TEXT_DETAILS,
-):
-    """
-    Yield Tokens from a ``tokens`` iterable of Token objects (built from a query-
-    side scanned file or string) that are inside a ``match_qspan`` matched Span
-    starting at `start_line` and ending at ``end_line``. If whole_lines is True,
-    also yield unmatched Tokens that are before and after the match and on the
-    first and last line of a match (unless the lines are very long text lines or
-    the match is from binary content.)
-
-    As a side effect, known matched tokens are tagged as "is_matched=True" if
-    they are matched.
-
-    If ``whole_lines`` is True, any token within matched lines range is
-    included. Otherwise, a token is included if its position is within the
-    matched ``match_qspan`` or it is a punctuation token immediately after the
-    matched ``match_qspan`` even though not matched.
-    """
-    start = match_qspan.start
-    end = match_qspan.end
-
-    started = False
-    finished = False
-
-    end_pos = 0
-    last_pos = 0
-    for real_pos, tok in enumerate(tokens):
-        if trace:
-            logger_debug("reportable_tokens: processing", real_pos, tok)
-
-        # ignore tokens outside the matched lines range
-        if tok.line_num < start_line:
-            if trace:
-                logger_debug(
-                    "  tok.line_num < start_line:", tok.line_num, "<", start_line
-                )
-
-            continue
-
-        if tok.line_num > end_line:
-            if trace:
-                logger_debug("  tok.line_num > end_line", tok.line_num, ">", end_line)
-
-            break
-
-        if trace:
-            logger_debug("reportable_tokens:", real_pos, tok)
-
-        is_included = False
-
-        # tagged known matched tokens (useful for highlighting)
-        if tok.pos != -1 and tok.is_known and tok.pos in match_qspan:
-            tok = attr.evolve(tok, is_matched=True)
-            is_included = True
-            if trace:
-                logger_debug("  tok.is_matched = True", "match_qspan:", match_qspan)
-        else:
-            if trace:
-                logger_debug(
-                    "  unmatched token: tok.is_matched = False",
-                    "match_qspan:",
-                    match_qspan,
-                    "tok.pos in match_qspan:",
-                    tok.pos in match_qspan,
-                )
-
-        if whole_lines:
-            # we only work on matched lines so no need to test further
-            # if start_line <= tok.line_num <= end_line.
-            if trace:
-                logger_debug("  whole_lines")
-
-            is_included = True
-
-        else:
-            # Are we in the match_qspan range or a punctuation right before or after
-            # that range?
-
-            # start
-            if not started and tok.pos == start:
-                started = True
-                if trace:
-                    logger_debug("  start")
-
-                is_included = True
-
-            # middle
-            if started and not finished:
-                if trace:
-                    logger_debug("    middle")
-
-                is_included = True
-
-            if tok.pos == end:
-                if trace:
-                    logger_debug("  at end")
-
-                finished = True
-                started = False
-                end_pos = real_pos
-
-            # one punctuation token after a match
-            if finished and not started and end_pos and last_pos == end_pos:
-                end_pos = 0
-                if not tok.is_text:
-                    # strip the trailing spaces of the last token
-                    if tok.value.strip():
-                        if trace:
-                            logger_debug("  end yield")
-
-                        is_included = True
-
-        last_pos = real_pos
-        if is_included:
-            yield tok
-
-
-def get_full_matched_text(
-    match,
-    location=None,
-    query_string=None,
-    idx=None,
-    whole_lines=False,
-    highlight=True,
-    highlight_matched="{}",
-    highlight_not_matched="[{}]",
-    only_matched=False,
-    stopwords=STOPWORDS,
-    _usecache=True,
-    trace=TRACE_MATCHED_TEXT,
-):
-    """
-    Yield strings corresponding to the full matched query text given a ``match``
-    LicenseMatch detected with an `idx` LicenseIndex in a query file at
-    ``location`` or a ``query_string``.
-
-    See get_full_qspan_matched_text() for other arguments documentation
-    """
-    if trace:
-        logger_debug("get_full_matched_text:  match:", match)
-
-    return get_full_qspan_matched_text(
-        match_qspan=match.qspan,
-        match_query_start_line=match.query.start_line,
-        match_start_line=match.start_line,
-        match_end_line=match.end_line,
-        location=location,
-        query_string=query_string,
-        idx=idx,
-        whole_lines=whole_lines,
-        highlight=highlight,
-        highlight_matched=highlight_matched,
-        highlight_not_matched=highlight_not_matched,
-        only_matched=only_matched,
-        stopwords=stopwords,
-        _usecache=_usecache,
-        trace=trace,
-    )
-
-
-def get_full_qspan_matched_text(
-    match_qspan,
-    match_query_start_line,
-    match_start_line,
-    match_end_line,
-    location=None,
-    query_string=None,
-    idx=None,
-    whole_lines=False,
-    highlight=True,
-    highlight_matched="{}",
-    highlight_not_matched="[{}]",
-    only_matched=False,
-    stopwords=STOPWORDS,
-    _usecache=True,
-    trace=TRACE_MATCHED_TEXT,
-):
-    """
-    Yield strings corresponding to words of the matched query text given a
-    ``match_qspan`` LicenseMatch qspan Span detected with an `idx` LicenseIndex
-    in a query file at ``location`` or a ``query_string``.
-
-    - ``match_query_start_line`` is the match query.start_line
-    - ``match_start_line`` is the match start_line
-    - ``match_end_line`` is the match= end_line
-
-    The returned strings contains the full text including punctuations and
-    spaces that are not participating in the match proper including punctuations.
-
-    If ``whole_lines`` is True, the unmatched part at the start of the first
-    matched line and the unmatched part at the end of the last matched lines are
-    also included in the returned text (unless the line is very long).
-
-    If ``highlight`` is True, each token is formatted for "highlighting" and
-    emphasis with the ``highlight_matched`` format string for matched tokens or to
-    the ``highlight_not_matched`` for tokens not matched. The default is to
-    enclose an unmatched token sequence in [] square brackets. Punctuation is
-    not highlighted.
-
-    if ``only_matched`` is True, only matched tokens are returned and
-    ``whole_lines`` and ``highlight`` are ignored. Unmatched words are replaced
-    by a "dot".
-
-    If ``_usecache`` is True, the tokenized text is cached for efficiency.
-    """
-    if trace:
-        logger_debug("get_full_qspan_matched_text:  match_qspan:", match_qspan)
-        logger_debug("get_full_qspan_matched_text:  location:", location)
-        logger_debug("get_full_qspan_matched_text:  query_string :", query_string)
-
-    assert location or query_string
-    assert idx
-
-    if only_matched:
-        # use highlighting to skip the reporting of unmatched entirely
-        whole_lines = False
-        highlight = True
-        highlight_matched = "{}"
-        highlight_not_matched = "."
-        highlight = True
-
-    # Create and process a stream of Tokens
-    if not _usecache:
-        # for testing only, reset cache on each call
-        tokens = tokenize_matched_text(
-            location=location,
-            query_string=query_string,
-            dictionary=idx.dictionary,
-            start_line=match_query_start_line,
-            _cache={},
-        )
-    else:
-        tokens = tokenize_matched_text(
-            location=location,
-            query_string=query_string,
-            dictionary=idx.dictionary,
-            start_line=match_query_start_line,
-        )
-
-    if trace:
-        tokens = list(tokens)
-        print()
-        logger_debug("get_full_qspan_matched_text:  tokens:")
-        for t in tokens:
-            print("    ", t)
-        print()
-
-    tokens = reportable_tokens(
-        tokens=tokens,
-        match_qspan=match_qspan,
-        start_line=match_start_line,
-        end_line=match_end_line,
-        whole_lines=whole_lines,
-    )
-
-    if trace:
-        tokens = list(tokens)
-        logger_debug("get_full_qspan_matched_text:  reportable_tokens:")
-        for t in tokens:
-            print(t)
-        print()
-
-    # Finally yield strings with eventual highlightings
-    for token in tokens:
-        val = token.value
-        if not highlight:
-            yield val
-        else:
-            if token.is_text and val.lower() not in stopwords:
-                if token.is_matched:
-                    yield highlight_matched.format(val)
-                else:
-                    yield highlight_not_matched.format(val)
-            else:
-                # we do not highlight punctuation and stopwords.
-                yield val
diff --git a/scantext/templates/scantext/includes/license_summary_more_details.html b/scantext/templates/scantext/includes/license_details.html
similarity index 71%
rename from scantext/templates/scantext/includes/license_summary_more_details.html
rename to scantext/templates/scantext/includes/license_details.html
index c42bca335..8743f6b6f 100644
--- a/scantext/templates/scantext/includes/license_summary_more_details.html
+++ b/scantext/templates/scantext/includes/license_details.html
@@ -2,7 +2,7 @@
     <div class="column is-two-third">
         <p class="title is-5">Detected Licenses</p>
     </div>
-    {% for license in detected_licenses.licenses %}
+    {% for license in detected_licenses.license_matches %}
     <article class="panel is-info">
         <div class="panel-heading py-2 px-4">
             <p>{{ license.license_expression }}</p>
@@ -14,8 +14,14 @@
                     <td>{{ license.score }}</td>
                     <td><strong>Matched Line(s)</strong></td>
                     <td>{% if license.start_line == license.end_line %} {{ license.start_line }} {% else %} {{ license.start_line }} - {{ license.end_line }} {% endif %}</td>
-                    <td><strong>Rule Identifer</strong></td>
-                     <td>{{ license.rule_identifier }}</td>
+                    <td><strong>Rule Identifier</strong></td>
+                    <td>
+                    {% if license.rule_text_url %}
+                        <a href="{{ license.rule_text_url }}" target="_blank">{{ license.rule_identifier }}</a>
+                    {% else %}
+                        {{ license.rule_identifier }}
+                    {% endif %}
+                    </td>
                 </tr>
                 <tr>
                     <td><strong>Matcher</strong></td>
@@ -25,20 +31,16 @@
                     <td><strong>Matched Length</strong></td>
                     <td>{{ license.matched_length }}</td>
                 <tr>
-                    <td><strong>Data File</strong></td>
-                    <td><a href="https://github.com/nexB/scancode-toolkit/tree/add-license-detection/src/licensedcode/data/{% if '.RULE' in license.rule_identifier %}rules{% elif '.LICENSE' in license.rule_identifier %}licenses{% endif %}/{{ license.rule_identifier }}" target="_blank">{{ license.rule_identifier }}</a></td>
                     <td><strong>Key(s)</strong></td>
                     <td>
-                        {% for key in license.licenses %}
+                    {% for key in license.licenses %}
                         <a href="{{ key.reference_url }}" target="_blank"><span class="mr-2">{{ key.key }}</span></a>
-                        {% endfor %}
+                    {% endfor %}
                     </td>
-                </tr>
-                <tr>
-                    <td><strong>Rule Length</strong></td>
-                    <td>{{ license.rule_length }}</td>
                     <td><strong>Rule Relevance</strong></td>
                     <td>{{ license.rule_relevance }}</td>
+                    <td><strong>Rule Length</strong></td>
+                    <td>{{ license.rule_length }}</td>
                 </tr>
             </tbody>
         </table>
diff --git a/scantext/templates/scantext/includes/license_summary_detail.html b/scantext/templates/scantext/includes/license_summary_detail.html
index 00c788059..2fa5de2ed 100644
--- a/scantext/templates/scantext/includes/license_summary_detail.html
+++ b/scantext/templates/scantext/includes/license_summary_detail.html
@@ -1,13 +1,12 @@
 <div class="columns mb-5 mx-1">
     <div class="column is-two-third">
-        <p class="title is-5">Input License Text</p>
-        <div class="license-match" style="font-size: 18px; white-space: pre-wrap; max-height: 70vh; overflow: scroll;">{% for match in detected_licenses.complete_text %}<span class="matched{{ match.1 }}">{{ match.0 }}</span>{% endfor %}
-        </div>
+        <p class="title is-5">Input Text</p>
+        <div class="license-match" style="font-size: 18px; white-space: pre-wrap; max-height: 70vh; overflow: scroll;">{% for token in detected_licenses.license_tokens %}<span class="matched{{ token.match_ids.0 }}">{{ token.value }}</span>{% endfor %}</div>
     </div>
     <div class="column is-one-third">
         <p class="title is-5">Detected Licenses</p>
         <div class="card">
-            {% for license in detected_licenses.licenses %}
+            {% for license in detected_licenses.license_matches %}
             <div class="licenses-card">
                 <div class="card-header is-flex is-justify-content-space-between">
                     <div class="card-header-title" title="{{ license.short_name }}">{{ license.license_expression }}</div>
diff --git a/scantext/templates/scantext/includes/license_summary_cards.html b/scantext/templates/scantext/includes/license_summary_editor.html
similarity index 96%
rename from scantext/templates/scantext/includes/license_summary_cards.html
rename to scantext/templates/scantext/includes/license_summary_editor.html
index a43524208..e8c06a1b9 100644
--- a/scantext/templates/scantext/includes/license_summary_cards.html
+++ b/scantext/templates/scantext/includes/license_summary_editor.html
@@ -1,12 +1,12 @@
 <div class="columns mb-5 mx-1">
     <div class="column is-two-third">
-        <p class="title is-5">Input License Text</p>
+        <p class="title is-5">Input Text</p>
         <div id="editor" style="height: 70vh; border: lightgrey 1px solid; position: sticky; top: 2vh;">{{ text }}</div>
     </div>
     <div class="column is-one-third">
         <p class="title is-5">Detected Licenses</p>
         <div class="card">
-        {% for license in detected_licenses.licenses %}
+            {% for license in detected_licenses.license_matches %}
             <div class="licenses-card">
                 <div class="card-header is-flex is-justify-content-space-between">
                     <div class="card-header-title" title="{{ license.short_name }}">{{ license.license_expression }}</div>
@@ -58,7 +58,7 @@
                     </div>
                 </div>
             </div>
-        {% endfor %}
+            {% endfor %}
         </div>
     </div>
-</div>
+</div>
\ No newline at end of file
diff --git a/scantext/templates/scantext/includes/license_summary_header.html b/scantext/templates/scantext/includes/license_summary_header.html
index 858f77752..916c5d013 100644
--- a/scantext/templates/scantext/includes/license_summary_header.html
+++ b/scantext/templates/scantext/includes/license_summary_header.html
@@ -12,7 +12,10 @@
         <div>
             <p class="heading">License Expressions</p>
             <p class="title">
-              <span>{{ detected_licenses.license_expressions_and_scores|length }}</span>
+                <!-- TODO: clean up this from views -->
+                <!-- licenses are every license inside a match ex: MIT, Apache-2.0 are 2 licenses -->
+                <!-- license_expressions are all the rules ex: "MIT OR Apache-2.0" is one expression -->
+              <span>{{ detected_licenses.license_matches|length }}</span>
             </p>
         </div>
     </div>
@@ -20,7 +23,7 @@
         <div>
             <p class="heading">Licenses</p>
             <p class="title">
-              <span>{{ detected_licenses.licenses|length }}</span>
+              <span>{{ detected_licenses.license_keys_count }}</span>
             </p>
         </div>
     </div>
diff --git a/scantext/templates/scantext/license_summary.html b/scantext/templates/scantext/license_summary.html
index 4ff64e789..f538b1e96 100644
--- a/scantext/templates/scantext/license_summary.html
+++ b/scantext/templates/scantext/license_summary.html
@@ -1,12 +1,13 @@
 {% extends 'scanpipe/base.html' %}
 {% load static humanize %}
 
-{% block content %}
+{% block extrahead %}
 <style>
-    .matched1 {background-color: rgba(30, 220, 90, 0.3);}
-    .matched2 {background-color: rgba(30, 90, 220, 0.3);}
-    .matched3 {background-color: rgba(220, 90, 30, 0.3);}
+    {% for mcolor in detected_licenses.match_colors %}{{ mcolor }}{% endfor %}
 </style>
+{% endblock %}
+
+{% block content %}
 <div class="container is-widescreen">
     {% include 'scanpipe/includes/navbar_header.html' %}
 
@@ -16,7 +17,6 @@ <h1 class="title is-4">License Detection Summary</h1>
             <a href="{% url 'license_scan' %}" class="button is-link">New Scan</a>
         </div>
     </section>
-
     <div class="tabs">
         <ul class="nav">
             <li class="is-active"><a>Summary In Ace</a></li>
@@ -29,7 +29,7 @@ <h1 class="title is-4">License Detection Summary</h1>
     <hr class="mx-1">
 
     <section class="tab-container">
-        {% include 'scantext/includes/license_summary_cards.html' with detected_licenses=detected_licenses %}
+        {% include 'scantext/includes/license_summary_editor.html' with detected_licenses=detected_licenses %}        
     </section>
 
     <section class="tab-container is-hidden">
@@ -37,7 +37,7 @@ <h1 class="title is-4">License Detection Summary</h1>
     </section>
 
     <section class="tab-container is-hidden">
-        {% include 'scantext/includes/license_summary_more_details.html' with detected_licenses=detected_licenses %}
+        {% include 'scantext/includes/license_details.html' with detected_licenses=detected_licenses %}
     </section>
 </div>
 {% endblock %}
@@ -137,28 +137,4 @@ <h1 class="title is-4">License Detection Summary</h1>
         }
     }
 </script>
-{{ detected_licenses|json_script:"detected_licenses" }}
-<script type="text/javascript">
-    var chart = bb.generate({
-      data: {
-        columns: {{ detected_licenses.license_expressions_scores|safe }},
-        type: "donut",
-      },
-      bindto: "#pieChart"
-    });
-
-    var chart = bb.generate({
-      data: {
-        columns: {{ detected_licenses.license_expressions_scores|safe }},
-        type: "bar", // for ESM specify as: bar()
-      },
-      bar: {
-        width: {
-          ratio: 0.5
-        }
-      },
-      bindto: "#barChart_1"
-    });
-
-</script>
 {% endblock %}
diff --git a/scantext/tests/test_views.py b/scantext/tests/test_views.py
new file mode 100644
index 000000000..8b4fe938c
--- /dev/null
+++ b/scantext/tests/test_views.py
@@ -0,0 +1,72 @@
+# -*- coding: utf-8 -*-
+#
+# Copyright (c) nexB Inc. and others. All rights reserved.
+# ScanCode is a trademark of nexB Inc.
+# SPDX-License-Identifier: Apache-2.0
+# See http://www.apache.org/licenses/LICENSE-2.0 for the license text.
+# See https://github.com/nexB/scancode-toolkit for support or download.
+# See https://aboutcode.org for more information about nexB OSS projects.
+#
+import os
+
+from commoncode.testcase import FileBasedTesting
+from licensedcode import match
+from licensedcode import models
+from licensedcode.spans import Span
+
+from scantext.views import get_license_keys_count
+from scantext.views import get_rule_text_url
+
+TEST_DATA_DIR = os.path.join(os.path.dirname(__file__), "data")
+
+
+class TestScantextViews(FileBasedTesting):
+    test_data_dir = TEST_DATA_DIR
+
+    def test_get_license_keys_count(self):
+        rule1 = models.Rule(license_expression="Apache-2.0", stored_text="1")
+        rule2 = models.Rule(license_expression="Apache-2.0 OR MIT", stored_text="2")
+        rule3 = models.Rule(license_expression="BSD AND GPL", stored_text="3")
+
+        match1 = match.LicenseMatch(rule=rule1, ispan=Span(), qspan=Span())
+        match2 = match.LicenseMatch(rule=rule2, ispan=Span(), qspan=Span())
+        match3 = match.LicenseMatch(rule=rule3, ispan=Span(), qspan=Span())
+
+        matches = [match1, match2, match3]
+        assert get_license_keys_count(matches) == 4
+
+    def test_get_rule_text_url__for_rule(self):
+        rule1 = models.Rule(license_expression="Apache-2.0", stored_text="1")
+        rule1.identifier = "Apache-2.0.RULE"
+        result = get_rule_text_url(rule=rule1, base_url="http://example.com")
+
+        assert result == "http://example.com/rules/Apache-2.0.RULE", result
+
+    def test_get_rule_text_url__for_license(self):
+        rule1 = models.Rule(
+            license_expression="Apache-2.0", stored_text="1", is_from_license=True
+        )
+        rule1.identifier = "Apache-2.0.LICENSE"
+        result = get_rule_text_url(rule=rule1, base_url="http://example.com")
+
+        assert result == "http://example.com/licenses/Apache-2.0.LICENSE", result
+
+    def test_get_rule_text_url__for_spdx(self):
+        rule1 = models.SpdxRule(license_expression="Apache-2.0", stored_text="1")
+        result = get_rule_text_url(rule=rule1, base_url="http://example.com")
+
+        assert not result
+
+    def test_get_rule_text_url__for_unknown(self):
+        rule1 = models.UnknownRule(license_expression="Apache-2.0", stored_text="1")
+        result = get_rule_text_url(rule=rule1, base_url="http://example.com")
+
+        assert not result
+
+    def test_get_rule_text_url__with_default_base_url(self):
+        rule1 = models.Rule(license_expression="apache-2.0 or mit", stored_text="1")
+        rule1.identifier = "apache-2.0_or_mit_48.RULE"
+        result = get_rule_text_url(rule=rule1)
+        expected = "https://github.com/nexB/scancode-toolkit/tree/develop/src/licensedcode/data/rules/apache-2.0_or_mit_48.RULE"
+
+        assert result == expected
diff --git a/scantext/views.py b/scantext/views.py
index 9aa1dd393..265ac9167 100644
--- a/scantext/views.py
+++ b/scantext/views.py
@@ -29,16 +29,21 @@
 from django.shortcuts import render
 
 import attr
+from licensedcode import models
 from licensedcode import query
-from licensedcode.match import tokenize_matched_text
 from licensedcode.spans import Span
 from licensedcode.stopwords import STOPWORDS
 from licensedcode.tokenize import index_tokenizer
 from licensedcode.tokenize import matched_query_text_tokenizer
 
 from scantext.forms import LicenseScanForm
+from scantext.match_text import tokenize_matched_text
 
 TRACE_HIGHLIGHTED_TEXT = True
+
+SCANCODE_BASE_URL = (
+    "https://github.com/nexB/scancode-toolkit/tree/develop/src/licensedcode/data"
+)
 SCANCODE_REPO_URL = "https://github.com/nexB/scancode-toolkit"
 SPDX_LICENSE_URL = "https://spdx.org/licenses/{}"
 DEJACODE_LICENSE_URL = "https://enterprise.dejacode.com/urn/urn:dje:license:{}"
@@ -84,6 +89,9 @@ def license_scanview(request):
 
     # The flush in tempfile is required to ensure that the content is
     # written to the disk before it's read by get_licenses function
+    # TODO: check this to handle input files
+    # https://github.com/nexB/commoncode/blob/9131627677d3ef171ddc472991a5c4d4a3431ee3/src/commoncode/fileutils.py#L99
+
     if input_text:
         with tempfile.NamedTemporaryFile(mode="w") as temp_file:
             temp_file.write(input_text)
@@ -91,6 +99,7 @@ def license_scanview(request):
             expressions = get_licenses(location=temp_file.name)
     elif input_file:
         try:
+            # rework on how to handle temporary files.
             with tempfile.NamedTemporaryFile(mode="w") as temp_file:
                 input_text = str(input_file.read(), "UTF-8")
                 temp_file.write(input_text)
@@ -108,7 +117,7 @@ def license_scanview(request):
             )
 
     if not expressions:
-        message = "Couldn't detect any license from the provided input."
+        message = "Could not detect any license."
         messages.warning(request, message)
         return render(
             request,
@@ -118,7 +127,8 @@ def license_scanview(request):
             },
         )
 
-    # pprint(expressions)
+    # import json
+    # print(json.dumps(expressions, indent=2))
 
     return render(
         request,
@@ -133,152 +143,164 @@ def license_scanview(request):
 def get_licenses(
     location,
     license_url_template=SCANCODE_LICENSEDB_URL,
-    deadline=sys.maxsize,
     **kwargs,
 ):
+    """
+    Return a mapping of license match data from detecting license
+    in the file at ``location`` suitable for use in template.
+
+    The mapping can be empty if there are no matches.
+    """
     from licensedcode.cache import get_index
     from licensedcode.spans import Span
 
     idx = get_index()
 
-    detected_licenses = []
-    detected_expressions_and_scores = []
-
     # gets matches from a license file
     matches = idx.match(
         location=location,
-        min_score=0,
-        deadline=deadline,
         unknown_licenses=True,
         **kwargs,
     )
 
     if not matches:
-        return False
+        return {}
 
-    qspans = []
-    match = None
-    complete_text = None
+    query = matches[0].query
 
-    for match in matches:
-        qspans.append(match.qspan)
-        detected_expressions_and_scores.append(
-            [match.rule.license_expression, match.score()]
-        )
-        detected_licenses.append(
-            get_mapping(
+    # Assign a numeric id to every match.
+    matches_by_id = dict(enumerate(matches))
+
+    del matches
+
+    license_matches = []
+
+    for mid, match in matches_by_id.items():
+        license_matches.append(
+            get_match_details(
+                mid=mid,
                 match=match,
-                include_text=True,
                 license_url_template=license_url_template,
                 spdx_license_url=SPDX_LICENSE_URL,
             )
         )
 
-    complete_text = get_highlighted_lines(
-        matches=matches,
+    license_tokens = get_license_tokens(
+        query=query,
+        matches_by_id=matches_by_id,
         stopwords=STOPWORDS,
         trace=TRACE_HIGHLIGHTED_TEXT,
     )
 
-    percentage_of_license_text = 0
-    if match:
-        # we need at least one match to compute a license_coverage
-        matched_tokens_length = len(Span().union(*qspans))
-        query_tokens_length = match.query.tokens_length(with_unknown=True)
-        percentage_of_license_text = round(
-            (matched_tokens_length / query_tokens_length) * 100, 2
-        )
+    match_colors = build_colors(matches_by_id=matches_by_id)
+    # print(match_colors)
 
-    return dict(
-        [
-            ("licenses", detected_licenses),
-            ("complete_text", complete_text),
-            ("license_expressions_and_scores", detected_expressions_and_scores),
-            ("percentage_of_license_text", percentage_of_license_text),
-        ]
-    )
+    return {
+        "license_matches": license_matches,
+        "license_tokens": license_tokens,
+        "match_colors": match_colors,
+        "license_keys_count": get_license_keys_count(matches=matches_by_id.values()),
+        "percentage_of_license_text": get_percentage_of_license_text(
+            query=query, matches=matches_by_id.values()
+        ),
+    }
+
+
+def get_percentage_of_license_text(query, matches):
+    """
+    Return percentage of license text matched in ``query`` Query by a list of ``matches``
+    percentage is a float between 0 and 100.
+    """
+
+    # TODO: percentage of license text should be done by scancode-toolkit.
+    if not matches:
+        return 0
 
+    qspans = (match.qspan for match in matches)
 
-def logger_debug(*args):
-    pass
+    matched_tokens_length = len(Span().union(*qspans))
+    query_tokens_length = query.tokens_length(with_unknown=True)
+    return round((matched_tokens_length / query_tokens_length) * 100, 2)
 
 
-def get_highlighted_lines(
-    matches,
+def get_license_tokens(
+    query,
+    matches_by_id,
     stopwords=STOPWORDS,
     trace=TRACE_HIGHLIGHTED_TEXT,
 ):
-    tokens = []
-
-    query = matches[0].query
-    tokens = tokenize_matched_text(
-        location=query.location,
-        query_string=query.query_string,
-        dictionary=query.idx.dictionary,
-        start_line=query.start_line,
-        _cache={},
-    )
-
-    class_position = 1
-    for match in matches:
-        tokens = tag_matched_tokens(
-            tokens=tokens, match_qspan=match.qspan, class_position=class_position
+    """
+    Return a list of tokens from the list of ``matches`` in ``query``.
+    """
+    # Token(value="", pos=3, is_text=True, is_matched=True, match_ids=[mid, mid, mid])
+    tokens = list(
+        tokenize_matched_text(
+            location=query.location,
+            query_string=query.query_string,
+            dictionary=query.idx.dictionary,
+            start_line=query.start_line,
         )
-        class_position += 1
-
-    body = []
-    for token in tokens:
-        val = token.value
-        if token.is_text and val.lower() not in stopwords:
-            if token.is_matched:
-                body.append([token.value, token.is_matched % 3])
-            else:
-                body.append([token.value, -1])
-        else:
-            # we do not highlight punctuation and stopwords.
-            body.append([token.value, -1])
+    )
 
-    return body
+    for mid, match in matches_by_id.items():
+        tag_matched_tokens(tokens=tokens, match_qspan=match.qspan, mid=mid)
 
+    return tokens
 
-def tag_matched_tokens(tokens, match_qspan, class_position):
 
+def tag_matched_tokens(tokens, match_qspan, mid):
+    """
+    Tag an iterable of ``tokens`` tagging each token with ``mid`` match id
+    if matched meaning the token is in the ``match_qspan``.
+    """
+    previous_is_matched = False
     for tok in tokens:
-        # tagged known matched tokens (useful for highlighting)
-        if tok.pos != -1 and tok.is_known and tok.pos in match_qspan:
-            tok = attr.evolve(tok, is_matched=class_position)
-        yield tok
+        if previous_is_matched and not tok.is_text:
+            tok.match_ids.append(mid)
+            tok = attr.evolve(tok, is_matched=True)
+            previous_is_matched = False
+        elif tok.pos != -1 and tok.is_known and tok.pos in match_qspan:
+            tok.match_ids.append(mid)
+            tok = attr.evolve(tok, is_matched=True)
+            previous_is_matched = True
 
 
-def get_mapping(
+def build_colors(matches_by_id):
+    """
+    Return a mapping of mid to css color code.
+
+    .matched1 {background-color: rgba(30, 220, 90, 0.3);}
+    .matched2 {background-color: rgba(30, 90, 220, 0.3);}
+    .matched3 {background-color: rgba(220, 90, 30, 0.3);}
+    """
+    return [
+        f".matched{mid} {{background-color: rgba({(244 * (mid+1)) % 255}, {(234 * (mid+1)) % 255}, {(130 * (mid+1)) % 255}, 0.3);}}"
+        for mid in matches_by_id
+    ]
+
+
+def get_match_details(
+    mid,
     match,
     license_url_template,
     spdx_license_url,
-    include_text=False,
-    license_text_diagnostics=False,
 ):
     """
-    Return a list of "matches" scan data built from a license match.
+    Return a mapping of license data built from a LicenseMatch ``match``.
     """
     from licensedcode import cache
 
     licenses = cache.get_licenses_db()
 
-    matched_text = None
-    if include_text:
-        if license_text_diagnostics:
-            matched_text = match.matched_text(whole_lines=False, highlight=True)
-        else:
-            matched_text = match.matched_text(whole_lines=False, highlight=False)
+    # TODO: decide whether the text should be highlighted or not.
+    matched_text = match.matched_text(whole_lines=False, highlight=False)
 
-    SCANCODE_BASE_URL = (
-        "https://github.com/nexB/scancode-toolkit/tree/develop/src/licensedcode/data/"
-    )
     SCANCODE_LICENSE_TEXT_URL = SCANCODE_BASE_URL + "/{}.LICENSE"
     SCANCODE_LICENSE_DATA_URL = SCANCODE_BASE_URL + "/{}.yml"
 
     result = {}
 
+    result["mid"] = mid
     # Detection Level Information
     result["score"] = match.score()
     result["start_line"] = match.start_line
@@ -289,7 +311,8 @@ def get_mapping(
 
     # LicenseDB Level Information (Rule that was matched)
     result["license_expression"] = match.rule.license_expression
-    result["rule_identifier"] = match.rule.identifier  # .RULE OR .LICENSE
+    result["rule_text_url"] = get_rule_text_url(match.rule)
+    result["rule_identifier"] = match.rule.identifier
     result["referenced_filenames"] = match.rule.referenced_filenames
     result["is_license_text"] = match.rule.is_license_text
     result["is_license_notice"] = match.rule.is_license_notice
@@ -298,8 +321,7 @@ def get_mapping(
     result["is_license_intro"] = match.rule.is_license_intro
     result["rule_length"] = match.rule.length
     result["rule_relevance"] = match.rule.relevance
-    if include_text:
-        result["matched_text"] = matched_text
+    result["matched_text"] = matched_text
 
     # License Level Information (Individual licenses that this rule refers to)
     result["licenses"] = detected_licenses = []
@@ -342,3 +364,30 @@ def get_mapping(
         detected_license["spdx_url"] = spdx_url
 
     return result
+
+
+def get_license_keys_count(matches):
+    """
+    Return the number of unique license keys found in a list of license matches.
+    """
+    keys = set()
+    for match in matches:
+        keys.update(match.rule.license_keys())
+
+    return len(keys)
+
+
+def get_rule_text_url(rule, base_url=SCANCODE_BASE_URL):
+    """
+    Return a URL to the text file of a ``rule`` Rule.
+    Return None if there is no URL for the ``rule``.
+    """
+
+    if isinstance(rule, (models.SpdxRule, models.UnknownRule)):
+        return
+
+    if rule.is_from_license:
+        return f"{base_url}/licenses/{rule.identifier}"
+
+    else:
+        return f"{base_url}/rules/{rule.identifier}"

From bdae386e83ea8fd5d1a3fda48a29e7867a0f2833 Mon Sep 17 00:00:00 2001
From: Akhil Raj <lf32.dev@gmail.com>
Date: Sat, 3 Sep 2022 00:41:14 +0530
Subject: [PATCH 45/59] Add report functionality to the summary

Signed-off-by: Akhil Raj <lf32.dev@gmail.com>
---
 scantext/templates/scantext/includes/license_report.html   | 3 +++
 .../scantext/includes/license_summary_detail.html          | 7 +++++--
 .../scantext/includes/license_summary_editor.html          | 7 +++++--
 3 files changed, 13 insertions(+), 4 deletions(-)
 create mode 100644 scantext/templates/scantext/includes/license_report.html

diff --git a/scantext/templates/scantext/includes/license_report.html b/scantext/templates/scantext/includes/license_report.html
new file mode 100644
index 000000000..4d7fa8708
--- /dev/null
+++ b/scantext/templates/scantext/includes/license_report.html
@@ -0,0 +1,3 @@
+<a href="https://github.com/nexB/scancode.io/issues/new?labels=bug&title=SCIO+License+Detection+Error&body=Scancode.io+detected+incorrect+or+false+license%0A```python%0A{{ license }}```%0A" target="_blank">
+    <span class="tag is-danger is-light">Report <i class="fa fa-flag ml-2"></i></span>
+</a>
\ No newline at end of file
diff --git a/scantext/templates/scantext/includes/license_summary_detail.html b/scantext/templates/scantext/includes/license_summary_detail.html
index 2fa5de2ed..b54ee6025 100644
--- a/scantext/templates/scantext/includes/license_summary_detail.html
+++ b/scantext/templates/scantext/includes/license_summary_detail.html
@@ -5,7 +5,7 @@
     </div>
     <div class="column is-one-third">
         <p class="title is-5">Detected Licenses</p>
-        <div class="card">
+        <div class="card is-shadowless">
             {% for license in detected_licenses.license_matches %}
             <div class="licenses-card">
                 <div class="card-header is-flex is-justify-content-space-between">
@@ -22,7 +22,10 @@
                 </div>
                 <div class="card-content is-hidden">
                     <div class="content">
-                        <p class="title is-5">Details</p>
+                        <div class="is-flex is-justify-content-space-between">
+                            <p class="title is-5">Details</p>
+                            {% include 'scantext/includes/license_report.html' with license=license %}
+                        </div>
                         <div class="table-container mt-3">
                             <table class="table is-striped is-narrow is-hoverable is-fullwidth">
                                 <tr>
diff --git a/scantext/templates/scantext/includes/license_summary_editor.html b/scantext/templates/scantext/includes/license_summary_editor.html
index e8c06a1b9..c7752c6fa 100644
--- a/scantext/templates/scantext/includes/license_summary_editor.html
+++ b/scantext/templates/scantext/includes/license_summary_editor.html
@@ -5,7 +5,7 @@
     </div>
     <div class="column is-one-third">
         <p class="title is-5">Detected Licenses</p>
-        <div class="card">
+        <div class="card is-shadowless">
             {% for license in detected_licenses.license_matches %}
             <div class="licenses-card">
                 <div class="card-header is-flex is-justify-content-space-between">
@@ -22,7 +22,10 @@
                 </div>
                 <div class="card-content is-hidden">
                     <div class="content">
-                        <p class="title is-5">Details</p>
+                        <div class="is-flex is-justify-content-space-between">
+                            <p class="title is-5">Details</p>
+                            {% include 'scantext/includes/license_report.html' with license=license %}
+                        </div>
                         <div class="table-container mt-3">
                             <table class="table is-striped is-narrow is-hoverable is-fullwidth">
                                 <tr>

From 81d1c6deae1c3aaf31cf862cc6037f34d86e0a8a Mon Sep 17 00:00:00 2001
From: Akhil Raj <lf32.dev@gmail.com>
Date: Wed, 7 Sep 2022 02:31:44 +0530
Subject: [PATCH 46/59] Add new user interface and cleanup the old one

* Ace isn't used any more
* Summary is presented in single page only

Signed-off-by: Akhil Raj <lf32.dev@gmail.com>
---
 ...details.html => license_detail_modal.html} |  41 ++++--
 .../scantext/includes/license_report.html     |   5 +-
 .../includes/license_summary_chart.html       |  10 --
 .../includes/license_summary_detail.html      | 108 +++++++-------
 .../includes/license_summary_editor.html      |  67 ---------
 .../includes/license_summary_header.html      |   5 +-
 .../templates/scantext/license_scan_form.html |   2 +-
 .../templates/scantext/license_summary.html   | 134 ++++--------------
 8 files changed, 112 insertions(+), 260 deletions(-)
 rename scantext/templates/scantext/includes/{license_details.html => license_detail_modal.html} (64%)
 delete mode 100644 scantext/templates/scantext/includes/license_summary_chart.html
 delete mode 100644 scantext/templates/scantext/includes/license_summary_editor.html

diff --git a/scantext/templates/scantext/includes/license_details.html b/scantext/templates/scantext/includes/license_detail_modal.html
similarity index 64%
rename from scantext/templates/scantext/includes/license_details.html
rename to scantext/templates/scantext/includes/license_detail_modal.html
index 8743f6b6f..5a269fcd4 100644
--- a/scantext/templates/scantext/includes/license_details.html
+++ b/scantext/templates/scantext/includes/license_detail_modal.html
@@ -1,19 +1,22 @@
-<div class="mb-5 mx-1">
-    <div class="column is-two-third">
-        <p class="title is-5">Detected Licenses</p>
-    </div>
-    {% for license in detected_licenses.license_matches %}
-    <article class="panel is-info">
-        <div class="panel-heading py-2 px-4">
-            <p>{{ license.license_expression }}</p>
-        </div>
-        <table class="license-table table is-striped is-bordered is-hoverable is-fullwidth">
+<div class="modal license-details-modal">
+  <div class="modal-background"></div>
+  <div class="modal-card" style="margin-top: 10vh">
+    <header class="modal-card-head">
+      <p class="modal-card-title">{{ license.license_expression }}</p>
+      <button class="delete license-details-close-modal" aria-label="close"></button>
+    </header>
+    <section class="modal-card-body is-4by4">
+      <table class="table is-striped is-hoverable is-fullwidth is-size-6">
             <tbody>
                 <tr>
                     <td><strong>Score</strong></td>
                     <td>{{ license.score }}</td>
+                </tr>
+                <tr>
                     <td><strong>Matched Line(s)</strong></td>
                     <td>{% if license.start_line == license.end_line %} {{ license.start_line }} {% else %} {{ license.start_line }} - {{ license.end_line }} {% endif %}</td>
+                </tr>
+                <tr>
                     <td><strong>Rule Identifier</strong></td>
                     <td>
                     {% if license.rule_text_url %}
@@ -26,10 +29,15 @@
                 <tr>
                     <td><strong>Matcher</strong></td>
                     <td>{{ license.matcher }}</td>
+                </tr>
+                <tr>
                     <td><strong>Match Coverage</strong></td>
                     <td>{{ license.match_coverage }}</td>
+                </tr>
+                <tr>
                     <td><strong>Matched Length</strong></td>
                     <td>{{ license.matched_length }}</td>
+                </tr>
                 <tr>
                     <td><strong>Key(s)</strong></td>
                     <td>
@@ -37,13 +45,22 @@
                         <a href="{{ key.reference_url }}" target="_blank"><span class="mr-2">{{ key.key }}</span></a>
                     {% endfor %}
                     </td>
+                </tr>
+                <tr>
                     <td><strong>Rule Relevance</strong></td>
                     <td>{{ license.rule_relevance }}</td>
+                </tr>
+                <tr>
                     <td><strong>Rule Length</strong></td>
                     <td>{{ license.rule_length }}</td>
                 </tr>
             </tbody>
         </table>
-    </article>
-    {% endfor %}
+    </section>
+    <footer class="modal-card-foot">
+      <button class="button is-outlined  has-text-weight-semibold">
+        {% include 'scantext/includes/license_report.html' with license=license %}
+      </button>
+    </footer>
+  </div>
 </div>
\ No newline at end of file
diff --git a/scantext/templates/scantext/includes/license_report.html b/scantext/templates/scantext/includes/license_report.html
index 4d7fa8708..9c11410c2 100644
--- a/scantext/templates/scantext/includes/license_report.html
+++ b/scantext/templates/scantext/includes/license_report.html
@@ -1,3 +1,2 @@
-<a href="https://github.com/nexB/scancode.io/issues/new?labels=bug&title=SCIO+License+Detection+Error&body=Scancode.io+detected+incorrect+or+false+license%0A```python%0A{{ license }}```%0A" target="_blank">
-    <span class="tag is-danger is-light">Report <i class="fa fa-flag ml-2"></i></span>
-</a>
\ No newline at end of file
+<a class="has-text-danger" href="https://github.com/nexB/scancode.io/issues/new?labels=bug&title=License+detection+error+as+`{{ license.license_expression|pprint }}`
+&body=Detection+level+details%0A```python%0A{%0A%20%20%20%20score+:+{{ license.score }}+%0A%20%20%20%20start_line+:+{{ license.start_line }}+%0A%20%20%20%20end_line+:+{{ license.end_line }}+%0A%20%20%20%20matched_length+:+{{ license.matched_length }}+%0A%20%20%20%20match_coverage+:+{{ license.match_coverage }}+%0A%20%20%20%20rule_identifier+:+{{ license.rule_identifier }}%0A}%0A```+%0A%0AMatched+Text%0A```%0A{{ license.matched_text }}%0A```+%0A%0AInput+Text%0A```%0A{{ license.matched_text }}%0A```" target="_blank">Report on Github</a>
\ No newline at end of file
diff --git a/scantext/templates/scantext/includes/license_summary_chart.html b/scantext/templates/scantext/includes/license_summary_chart.html
deleted file mode 100644
index 13ddf0a54..000000000
--- a/scantext/templates/scantext/includes/license_summary_chart.html
+++ /dev/null
@@ -1,10 +0,0 @@
-<div class="mb-5 mx-1">
-   	<div>
-       	<p class="title is-5">Detected License Expressions</p>
-		<div id="pieChart"></div>
-	</div>
-	<div>
-       	<p class="title is-5">License Expressions Scores</p>
-		<div id="barChart_1"></div>
-	</div>
-</div>
\ No newline at end of file
diff --git a/scantext/templates/scantext/includes/license_summary_detail.html b/scantext/templates/scantext/includes/license_summary_detail.html
index b54ee6025..7587b1389 100644
--- a/scantext/templates/scantext/includes/license_summary_detail.html
+++ b/scantext/templates/scantext/includes/license_summary_detail.html
@@ -1,62 +1,57 @@
 <div class="columns mb-5 mx-1">
-    <div class="column is-two-third">
-        <p class="title is-5">Input Text</p>
-        <div class="license-match" style="font-size: 18px; white-space: pre-wrap; max-height: 70vh; overflow: scroll;">{% for token in detected_licenses.license_tokens %}<span class="matched{{ token.match_ids.0 }}">{{ token.value }}</span>{% endfor %}</div>
-    </div>
-    <div class="column is-one-third">
+    <div class="column is-one-third" style="max-height: 50vh;">
         <p class="title is-5">Detected Licenses</p>
-        <div class="card is-shadowless">
-            {% for license in detected_licenses.license_matches %}
-            <div class="licenses-card">
-                <div class="card-header is-flex is-justify-content-space-between">
-                    <div class="card-header-title" title="{{ license.short_name }}">{{ license.license_expression }}</div>
-                    <div class="is-flex is-justify-content-row is-align-items-center">
-                        <p class="lines tag is-6 mx-1 is-light is-info">{% if license.start_line == license.end_line %} Line {{ license.start_line }} {% else %} Lines {{ license.start_line }} - {{ license.end_line }} {% endif %}</p>
-                        <p class="tag is-6 mx-1 is-light {% if license.score == 100 %} is-success {% else %} is-warning {% endif %}">{{ license.score }}</p>
-                        <p class="card-header-icon" data-key="{{ license.key }}" data-startline="{{ license.start_line }}" data-endline="{{ license.end_line }}">
+        <div class="panel is-light is-shadowless">
+        {% for license in detected_licenses.license_matches %}
+            <div class="panel-block licenses-card is-flex is-justify-content-space-between p-0">
+                <div class="card-header-title">{{ license.license_expression }}</div>
+                <div class="is-flex is-justify-content-row is-align-items-center">
+                    <p class="tag is-6 mx-1 is-light {% if license.score == 100 %} is-success {% else %} is-warning {% endif %}">{{ license.score }}</p>
+                    <div class="dropdown is-hoverable">
+                        <p class="card-header-icon">
                             <span class="icon">
                                 <i class="fa fa-angle-down" aria-hidden="true"></i>
                             </span>
                         </p>
-                    </div>
-                </div>
-                <div class="card-content is-hidden">
-                    <div class="content">
-                        <div class="is-flex is-justify-content-space-between">
-                            <p class="title is-5">Details</p>
-                            {% include 'scantext/includes/license_report.html' with license=license %}
-                        </div>
-                        <div class="table-container mt-3">
-                            <table class="table is-striped is-narrow is-hoverable is-fullwidth">
-                                <tr>
-                                    <td><strong>License Expression</strong></td>
-                                    <td>{{ license.license_expression }}</td>
-                                </tr>
-                                <tr>
-                                    <td><strong>Score</strong></td>
-                                    <td>{{ license.score }}</td>
-                                </tr>
-                                <tr>
-                                    <td><strong>Matched Line(s)</strong></td>
-                                    <td>{% if license.start_line == license.end_line %}{{ license.start_line }}{% else %}{{ license.start_line }}-{{ license.end_line }} {% endif %}</td>
-                                </tr>
-                                <tr>
-                                    <td><strong>Matched Length</strong></td>
-                                    <td>{{ license.matched_length }}</td>
-                                </tr>
-                                <tr>
-                                    <td><strong>Matched Coverage</strong></td>
-                                    <td>{{ license.match_coverage }}</td>
-                                </tr>
-                                <tr>
-                                    <td><strong>Matcher</strong></td>
-                                    <td>{{ license.matcher }}</td>
-                                </tr>
-                                <tr>
-                                    <td><strong>Rule Identifer</strong></td>
-                                    <td>{{ license.rule_identifier }}</td>
-                                </tr>
-                            </table>
+                        <div class="dropdown-menu">
+                            <div class="dropdown-content py-0">
+                                <table class="panel-block table is-striped is-size-6 is-hoverable mb-0" style="white-space: nowrap;">
+                                    <tr>
+                                        <td><strong>License Expression</strong></td>
+                                        <td>{{ license.license_expression }}</td>
+                                    </tr>
+                                    <tr>
+                                        <td><strong>Score</strong></td>
+                                        <td>{{ license.score }}</td>
+                                    </tr>
+                                    <tr>
+                                        <td><strong>Matched Line(s)</strong></td>
+                                        <td>{% if license.start_line == license.end_line %}{{ license.start_line }}{% else %}{{ license.start_line }}-{{ license.end_line }} {% endif %}</td>
+                                    </tr>
+                                    <tr>
+                                        <td><strong>Matched Length</strong></td>
+                                        <td>{{ license.matched_length }}</td>
+                                    </tr>
+                                    <tr>
+                                        <td><strong>Matched Coverage</strong></td>
+                                        <td>{{ license.match_coverage }}</td>
+                                    </tr>
+                                    <tr>
+                                        <td><strong>Matcher</strong></td>
+                                        <td>{{ license.matcher }}</td>
+                                    </tr>
+                                    <tr>
+                                        <td><strong>Rule Identifer</strong></td>
+                                        <td>{{ license.rule_identifier }}</td>
+                                    </tr>
+                                </table>
+                                <div class="panel-heading py-2 is-size-7 is-dark is-flex is-justify-content-space-between has-background-white">
+                                    <span class="license-details-btn has-text-link" style="cursor: pointer;">View more details</span>
+                                    {% include 'scantext/includes/license_detail_modal.html' with license=license %}
+                                    <!-- report license hyperlink -->
+                                    {% include 'scantext/includes/license_report.html' with license=license %}
+                                </div>
+                            </div>
                         </div>
                     </div>
                 </div>
@@ -64,4 +59,9 @@
             {% endfor %}
         </div>
     </div>
-</div>
+    <div class="column is-two-third">
+        <p class="title is-5">Input Text</p>
+        <div class="license-match">{% for token in detected_licenses.license_tokens %}<span class="matched{{ token.match_ids.0 }}" 
+            title="{{ token.match_ids.0 }}">{{ token.value }}</span>{% endfor %}</div>
+    </div>
+</div>
\ No newline at end of file
diff --git a/scantext/templates/scantext/includes/license_summary_editor.html b/scantext/templates/scantext/includes/license_summary_editor.html
deleted file mode 100644
index c7752c6fa..000000000
--- a/scantext/templates/scantext/includes/license_summary_editor.html
+++ /dev/null
@@ -1,67 +0,0 @@
-<div class="columns mb-5 mx-1">
-    <div class="column is-two-third">
-        <p class="title is-5">Input Text</p>
-        <div id="editor" style="height: 70vh; border: lightgrey 1px solid; position: sticky; top: 2vh;">{{ text }}</div>
-    </div>
-    <div class="column is-one-third">
-        <p class="title is-5">Detected Licenses</p>
-        <div class="card is-shadowless">
-            {% for license in detected_licenses.license_matches %}
-            <div class="licenses-card">
-                <div class="card-header is-flex is-justify-content-space-between">
-                    <div class="card-header-title" title="{{ license.short_name }}">{{ license.license_expression }}</div>
-                    <div class="is-flex is-justify-content-row is-align-items-center">
-                        <p class="lines tag is-6 mx-1 is-light is-info">{% if license.start_line == license.end_line %} Line {{ license.start_line }} {% else %} Lines {{ license.start_line }} - {{ license.end_line }} {% endif %}</p>
-                        <p class="tag is-6 mx-1 is-light {% if license.score == 100 %} is-success {% else %} is-warning {% endif %}">{{ license.score }}</p>
-                        <p class="card-header-icon" data-key="{{ license.key }}" data-startline="{{ license.start_line }}" data-endline="{{ license.end_line }}">
-                            <span class="icon">
-                                <i class="fa fa-angle-down" aria-hidden="true"></i>
-                            </span>
-                        </p>
-                    </div>
-                </div>
-                <div class="card-content is-hidden">
-                    <div class="content">
-                        <div class="is-flex is-justify-content-space-between">
-                            <p class="title is-5">Details</p>
-                            {% include 'scantext/includes/license_report.html' with license=license %}
-                        </div>
-                        <div class="table-container mt-3">
-                            <table class="table is-striped is-narrow is-hoverable is-fullwidth">
-                                <tr>
-                                    <td><strong>License Expression</strong></td>
-                                    <td>{{ license.license_expression }}</td>
-                                </tr>
-                                <tr>
-                                    <td><strong>Score</strong></td>
-                                    <td>{{ license.score }}</td>
-                                </tr>
-                                <tr>
-                                    <td><strong>Matched Line(s)</strong></td>
-                                    <td>{% if license.start_line == license.end_line %}{{ license.start_line }}{% else %}{{ license.start_line }}-{{ license.end_line }} {% endif %}</td>
-                                </tr>
-                                <tr>
-                                    <td><strong>Matched Length</strong></td>
-                                    <td>{{ license.matched_length }}</td>
-                                </tr>
-                                <tr>
-                                    <td><strong>Matched Coverage</strong></td>
-                                    <td>{{ license.match_coverage }}</td>
-                                </tr>
-                                <tr>
-                                    <td><strong>Matcher</strong></td>
-                                    <td>{{ license.matcher }}</td>
-                                </tr>
-                                <tr>
-                                    <td><strong>Rule Identifer</strong></td>
-                                    <td>{{ license.rule_identifier }}</td>
-                                </tr>
-                            </table>
-                        </div>
-                    </div>
-                </div>
-            </div>
-            {% endfor %}
-        </div>
-    </div>
-</div>
\ No newline at end of file
diff --git a/scantext/templates/scantext/includes/license_summary_header.html b/scantext/templates/scantext/includes/license_summary_header.html
index 916c5d013..b9f83fa87 100644
--- a/scantext/templates/scantext/includes/license_summary_header.html
+++ b/scantext/templates/scantext/includes/license_summary_header.html
@@ -12,9 +12,6 @@
         <div>
             <p class="heading">License Expressions</p>
             <p class="title">
-                <!-- TODO: clean up this from views -->
-                <!-- licenses are every license inside a match ex: MIT, Apache-2.0 are 2 licenses -->
-                <!-- license_expressions are all the rules ex: "MIT OR Apache-2.0" is one expression -->
               <span>{{ detected_licenses.license_matches|length }}</span>
             </p>
         </div>
@@ -27,4 +24,4 @@
             </p>
         </div>
     </div>
-</nav>
+</nav>
\ No newline at end of file
diff --git a/scantext/templates/scantext/license_scan_form.html b/scantext/templates/scantext/license_scan_form.html
index 0b62ef259..0c55122d8 100644
--- a/scantext/templates/scantext/license_scan_form.html
+++ b/scantext/templates/scantext/license_scan_form.html
@@ -52,4 +52,4 @@ <h1 class="title is-4">Scan License</h1>
     }
 
 </script>
-{% endblock %}
+{% endblock %}
\ No newline at end of file
diff --git a/scantext/templates/scantext/license_summary.html b/scantext/templates/scantext/license_summary.html
index f538b1e96..650065694 100644
--- a/scantext/templates/scantext/license_summary.html
+++ b/scantext/templates/scantext/license_summary.html
@@ -4,6 +4,15 @@
 {% block extrahead %}
 <style>
     {% for mcolor in detected_licenses.match_colors %}{{ mcolor }}{% endfor %}
+
+    .license-match {
+        font-size: 18px;
+        white-space: pre-wrap;
+        max-height: 80vh;
+        overflow: scroll;
+        border: 1px solid #efefef;
+        border-radius: 4px;
+    }
 </style>
 {% endblock %}
 
@@ -11,130 +20,37 @@
 <div class="container is-widescreen">
     {% include 'scanpipe/includes/navbar_header.html' %}
 
-    <section class="mx-5 mb-0">
-        <div class="is-flex is-justify-content-space-between">
-            <h1 class="title is-4">License Detection Summary</h1>
-            <a href="{% url 'license_scan' %}" class="button is-link">New Scan</a>
-        </div>
-    </section>
-    <div class="tabs">
-        <ul class="nav">
-            <li class="is-active"><a>Summary In Ace</a></li>
-            <li><a>Summary with Highlighted Text</a></li>
-            <li><a>License Details</a></li>
-        </ul>
-    </div>
-
     {% include 'scantext/includes/license_summary_header.html' with detected_licenses=detected_licenses %}
     <hr class="mx-1">
 
     <section class="tab-container">
-        {% include 'scantext/includes/license_summary_editor.html' with detected_licenses=detected_licenses %}        
-    </section>
-
-    <section class="tab-container is-hidden">
         {% include 'scantext/includes/license_summary_detail.html' with detected_licenses=detected_licenses %}
     </section>
 
-    <section class="tab-container is-hidden">
-        {% include 'scantext/includes/license_details.html' with detected_licenses=detected_licenses %}
-    </section>
 </div>
 {% endblock %}
 
 {% block scripts %}
-<script src="{% static 'ace-1.9.5.min.js' %}" crossorigin="anonymous"></script>
-<script src="{% static 'billboard-3.0.1.pkgd.min.js' %}" crossorigin="anonymous"></script>
 <script type="text/javascript">
-    let editor = ace.edit("editor", {
-        mode: "ace/mode/text",
-        autoScrollEditorIntoView: true,
-        wrap: true,
-        readOnly: true,
-        showPrintMargin: false,
-        highlightActiveLine: false,
-        highlightGutterLine: false,
-        fontSize: 15,
-        foldStyle: "manual",
-        fontFamily: "SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace",
-    });
-
-    const cards = document.querySelectorAll('.card-header-icon')
-    const tabContent = document.querySelectorAll('.tab-container')
-
-    document.querySelectorAll('.nav li').forEach((li, index) => {
-        li.addEventListener('click', (event) => {
-         if (!li.classList.contains('is-active')) {
-            document.querySelectorAll('li').forEach(listItem => {
-                listItem.classList.remove('is-active')
+    const detailBtns = document.querySelectorAll('.license-details-btn')
+    const modalCards = document.querySelectorAll('.license-details-modal')
+    const closeModalBtns = document.querySelectorAll('.license-details-close-modal')
+
+    detailBtns.forEach((btn, index) => {
+        btn.addEventListener('click', (e) => {
+            e.preventDefault()
+            modalCards.forEach(modalcard => {
+              modalcard.style.display= 'none'
             })
-            li.classList.add('is-active')
-         }
-
-         tabContent.forEach(section => {
-            section.classList.add('is-hidden')
-         })
-         tabContent[index].classList.remove('is-hidden')
+            modalCards[index].style.display= 'block'
         })
     })
 
-    cards.forEach(card => {
-      card.addEventListener('click', (event) => {
-        event.preventDefault()
-        content = card.parentElement.parentElement.parentElement.querySelector('.card-content').classList
-        lineTag= card.parentElement.querySelector('.lines').classList
-        if (content.contains('is-hidden')) {
-            cards.forEach(eachcard => {
-                eachcard.parentElement.querySelector('.lines').classList.remove('is-hidden')
-                eachcard.parentElement.parentElement.parentElement.querySelector('.card-content').classList.add('is-hidden')
-            })
-            content.remove('is-hidden')
-            lineTag.add('is-hidden')
-        } else {
-            content.add('is-hidden')
-            lineTag.remove('is-hidden')
-        }
-
-        let key = card.getAttribute('data-key')
-        let start_line = card.getAttribute('data-startline')
-        let end_line = card.getAttribute('data-endline')
-
-        setDetectedValues(key, start_line, end_line);
-      })
+    closeModalBtns.forEach((btn, index) => {
+        btn.addEventListener('click', (e) => {
+            e.preventDefault()
+            modalCards[index].style.display = 'none'
+        })
     })
-
-    // Range(startRow, startColumn, endRow, endColumn)
-    const Range = require("ace/range").Range
-
-    let setDetectedValues = (key, start_line, end_line) => {
-      let annotations = [];
-      removeAllMarkers();
-
-        // Indexes a 0-based in ace.js
-        let start_row = start_line - 1;
-        let start_column = 0;
-        let end_row = end_line - 1;
-        let end_column = 10000;
-
-        let range = new Range(start_row, start_column, end_row, end_column);
-        editor.session.addMarker(range, "ace-marker", "line");
-        annotations.push({
-          row: start_row,
-          column: 0,
-          text: key,
-          type: "info",
-        });
-
-      editor.getSession().setAnnotations(annotations);
-      editor.renderer.scrollToLine(start_row);
-    }
-
-    function removeAllMarkers() {
-      let session = editor.getSession();
-      let markers = session.getMarkers();
-        for (const [key, value] of Object.entries(markers)) {
-          session.removeMarker(value.id);
-        }
-    }
 </script>
-{% endblock %}
+{% endblock %}
\ No newline at end of file

From 5c883673e58654624cd178b539708942ae404a18 Mon Sep 17 00:00:00 2001
From: Akhil Raj <lf32.dev@gmail.com>
Date: Wed, 7 Sep 2022 20:22:42 +0530
Subject: [PATCH 47/59] Remove unused tests and validate code format #450

Signed-off-by: Akhil Raj <lf32.dev@gmail.com>
---
 scantext/tests/data/licenses                  |    5 -
 .../tests/data/matched_text/binary_text/gosu  |  Bin 1712 -> 0 bytes
 .../binary_text/rules/gpl-3.0_rdesc_1.RULE    |    1 -
 .../binary_text/rules/gpl-3.0_rdesc_1.yml     |    2 -
 .../tests/data/matched_text/ffmpeg/ffmpeg     |  Bin 6136 -> 0 bytes
 .../tests/data/matched_text/ffmpeg/ffmpeg.exe |  Bin 16136 -> 0 bytes
 .../data/matched_text/ffmpeg/libavsample.lib  |  Bin 1783 -> 0 bytes
 .../index/rules/gpl-2.0_bare_single_word.RULE |    1 -
 .../index/rules/gpl-2.0_bare_single_word.yml  |    3 -
 .../index/rules/gpl-2.0_or_apache-2.0_2.RULE  |    3 -
 .../index/rules/gpl-2.0_or_apache-2.0_2.yml   |    6 -
 .../matched_text/index/rules/mit_101.RULE     |    2 -
 .../data/matched_text/index/rules/mit_101.yml |    5 -
 scantext/tests/data/matched_text/query.txt    |    4 -
 .../tests/data/matched_text/spdx/query.txt    |   12 -
 .../tokenize_matched_text_query.txt           |    1 -
 .../data/matched_text/turkish_unicode/query   |   20 -
 .../turkish_unicode/rules/rule1.RULE          |    1 -
 .../turkish_unicode/rules/rule1.yml           |    1 -
 .../turkish_unicode/rules/rule2.RULE          |    2 -
 .../turkish_unicode/rules/rule2.yml           |    1 -
 .../turkish_unicode/rules/rule3.RULE          |    1 -
 .../turkish_unicode/rules/rule3.yml           |    1 -
 .../turkish_unicode/rules/rule4.RULE          |    1 -
 .../turkish_unicode/rules/rule4.yml           |    1 -
 .../data/matched_text/unicode_text/main3.js   |    1 -
 scantext/tests/test_match_text.py             | 1490 -----------------
 scantext/tests/test_views.py                  |    6 +-
 scantext/views.py                             |    8 +-
 29 files changed, 10 insertions(+), 1569 deletions(-)
 delete mode 100644 scantext/tests/data/licenses
 delete mode 100644 scantext/tests/data/matched_text/binary_text/gosu
 delete mode 100644 scantext/tests/data/matched_text/binary_text/rules/gpl-3.0_rdesc_1.RULE
 delete mode 100644 scantext/tests/data/matched_text/binary_text/rules/gpl-3.0_rdesc_1.yml
 delete mode 100644 scantext/tests/data/matched_text/ffmpeg/ffmpeg
 delete mode 100644 scantext/tests/data/matched_text/ffmpeg/ffmpeg.exe
 delete mode 100644 scantext/tests/data/matched_text/ffmpeg/libavsample.lib
 delete mode 100644 scantext/tests/data/matched_text/index/rules/gpl-2.0_bare_single_word.RULE
 delete mode 100644 scantext/tests/data/matched_text/index/rules/gpl-2.0_bare_single_word.yml
 delete mode 100644 scantext/tests/data/matched_text/index/rules/gpl-2.0_or_apache-2.0_2.RULE
 delete mode 100644 scantext/tests/data/matched_text/index/rules/gpl-2.0_or_apache-2.0_2.yml
 delete mode 100644 scantext/tests/data/matched_text/index/rules/mit_101.RULE
 delete mode 100644 scantext/tests/data/matched_text/index/rules/mit_101.yml
 delete mode 100644 scantext/tests/data/matched_text/query.txt
 delete mode 100644 scantext/tests/data/matched_text/spdx/query.txt
 delete mode 100644 scantext/tests/data/matched_text/tokenize_matched_text_query.txt
 delete mode 100644 scantext/tests/data/matched_text/turkish_unicode/query
 delete mode 100644 scantext/tests/data/matched_text/turkish_unicode/rules/rule1.RULE
 delete mode 100644 scantext/tests/data/matched_text/turkish_unicode/rules/rule1.yml
 delete mode 100644 scantext/tests/data/matched_text/turkish_unicode/rules/rule2.RULE
 delete mode 100644 scantext/tests/data/matched_text/turkish_unicode/rules/rule2.yml
 delete mode 100644 scantext/tests/data/matched_text/turkish_unicode/rules/rule3.RULE
 delete mode 100644 scantext/tests/data/matched_text/turkish_unicode/rules/rule3.yml
 delete mode 100644 scantext/tests/data/matched_text/turkish_unicode/rules/rule4.RULE
 delete mode 100644 scantext/tests/data/matched_text/turkish_unicode/rules/rule4.yml
 delete mode 100644 scantext/tests/data/matched_text/unicode_text/main3.js
 delete mode 100644 scantext/tests/test_match_text.py

diff --git a/scantext/tests/data/licenses b/scantext/tests/data/licenses
deleted file mode 100644
index ea5c84937..000000000
--- a/scantext/tests/data/licenses
+++ /dev/null
@@ -1,5 +0,0 @@
-Apache-2.0
-
-MIT
-
-Lesser GPL
\ No newline at end of file
diff --git a/scantext/tests/data/matched_text/binary_text/gosu b/scantext/tests/data/matched_text/binary_text/gosu
deleted file mode 100644
index 61b925fe68c8f0f791a440e370e9b61dfc0684ae..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 1712
zcmbVM%WD%s7@t108fhC8@j6gwRcJHsnJrRsX)Rc=BG!WlO_OaBOtYoC@j)rgv3S*E
z4<35;;HmB1gU2GG$Mz!hk5F)CXJ*|5q2R#I?|c2e@0-WY?)=ii;J^UzV-U;%qn+%|
zr>3%VeEAy!%0Ph;J&%DQa9R6@_2=WtPV%_tcpgE~CNv!L?T=f1f4rOu9&6SNJepWD
zOu9>{<fpO|ES`?q0v>%m!%K4%T?55s%X8ShcO%t3=6Pm~V0psEFIRi8yJ8o(7Ytdu
zOaJI+QcHQ(df|M<MRacthv4po;aPy^WrqJ0%qq;6^;|65Om01=zuiGutKS}{0q{^u
zhCk>n+5XxDbMHsICq>w!0npXD+rb`B`QTQnzwNQwT6xn`t>o*U9T*D$XO3UHo~qdd
zGsy}S4SD|Jq2(_g+WH@iGdKG8{e|}B`R7NLe|}{7Ich5Qr30NlJ~*~~aBTVAch`DS
zz#a{NDaX(DFuSJ{X=H!AJJu|VIUUoJ=R?Pb_TE#(5Jv(@q)>nn#+YD=8Rl4Ei4_hA
zBA5_D2_u{cB8egaMHEv)DP@#XK_yi*V2EKx7-fucCYWT31srkA38$QK&IOlT@jxJf
z1rby*!G#b~C=o~`u_Tg8Cb<++N+kn@6jnr0#S~XUNu^W}=oSNAKG3NHO$)TC@FcF(
z!&$h!4QC#Qt#!Dw13PiJH63q;RalKSHY)8Jd{)`2$IlB8!m!>On=~u!s14UDal^RH
z$fs&YYf<gZ>{b*dX$+jM!l_q{sIt+7lasJnoB9_EBZOwL_7)2HWG}<5xEZx=11nb1
z{r1GPn$@r!>jm9kUYe%x_Ijt)f=T!~ft3U{l4LWUEtTrcq|sTM(W@)j{!8^J?%XK=
zu=>^fumEfxFIfEbAjOkUODV!{RzLe;vH0ETAFaRW;x-+gtXjLj*8Yk1&w(zP4u8Ey
Woj&H!KRf=B@X!5K=P!S@{enMJslW9A

diff --git a/scantext/tests/data/matched_text/binary_text/rules/gpl-3.0_rdesc_1.RULE b/scantext/tests/data/matched_text/binary_text/rules/gpl-3.0_rdesc_1.RULE
deleted file mode 100644
index 3c0984a8e..000000000
--- a/scantext/tests/data/matched_text/binary_text/rules/gpl-3.0_rdesc_1.RULE
+++ /dev/null
@@ -1 +0,0 @@
-License: GPL-3
diff --git a/scantext/tests/data/matched_text/binary_text/rules/gpl-3.0_rdesc_1.yml b/scantext/tests/data/matched_text/binary_text/rules/gpl-3.0_rdesc_1.yml
deleted file mode 100644
index 8f2188c97..000000000
--- a/scantext/tests/data/matched_text/binary_text/rules/gpl-3.0_rdesc_1.yml
+++ /dev/null
@@ -1,2 +0,0 @@
-license_expression: gpl-3.0
-is_license_tag: yes
diff --git a/scantext/tests/data/matched_text/ffmpeg/ffmpeg b/scantext/tests/data/matched_text/ffmpeg/ffmpeg
deleted file mode 100644
index c06345a809dba9dab8c95e37505411701fdd65c7..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 6136
zcmeHL-EQN!71m6D(E1g+7sM9sWU!HJXR<+d23d@g*fRza+pwM44vHe6B+6z-5>-)h
z?7^b@4t<J6ulojlkv>8%_p;w1rP#`5+7}oFvBbl}!^1=Jd>{G4;A;5v$rGo3o;u$;
zRQmTnUK^49b)190|H72%=e2X{{1(4oIKOj#jq<OIjB2Rvb>sb0Wx9U<#?(3vS^Fja
zPoG-B>C@Ndqkey3#puI6wTJYk{&vsn>eIge`Of-Tza58}dTK|${utQ$AA`Wi>-VK=
ziq20R`>xwL49|s15|uKsDMYHJO7%<D)?UlGE;k~_C;L-d|M_Lh=w8c2uwt8uN{8BC
ziinqS!PYVqiUleZ!6UW23}v%+tXL%D0)3<$R&^!<8A%bM>znjLs_xRd{#jc;gLG)M
z+uHgvO0%-im1FSZ_wA7SWe08j+-i__sY(Sq(5lP>QQ50%CuPwzQ0vO{!%nl~MNyK8
zr868}PX}*qFZ+xB<!I)-om>x`PO0;bUh!OXDy_C|>rPv{h+q_DxzM6;D%noZ&vU+I
zg;LCyOAPEnm2rsP3&!JE1<Ws_NaUypZD<jXTm)*F%8w#sDl0HcjDpf~=2<L@LZ(Yr
ztOSCeBs>jcnF_R?2_90-UIXTNp2HGe1S?qL+RpT+%)>^^*O7FVvGRFbt(|^ZC^r;^
z2nyJ+k<~0xv{o=1&IM0tD6!N<UF)$iBr?fjk%$z{V7aM>nCnNi9LIGL$4G9@DB8?*
zu~2jFu0>9Z^{iX^Wm=S7uZJqXlyT^4gb(Vjof3!<={Q(%nVw;)LXpC4*Nvqg%G?<3
z;JdRqJ-IzOhBz%Z?lLW*BGcer1>bQQipSJ~;4BEB9&9#e%dft|mq@uE3wN2tyMkFJ
z)Nv@asnta;vy3KaC&S*s{>jhakjg~*7c9KTYjv>7m7wXx{<-gvud(7dg?V2l^)@Y5
zI|&9?#Clhbd8jkKSC&54`}NZI<C2!_eMj)m*GGOs5b&UB1i@1cjR<RX&odZ_R9XZo
zjU)}TiW-S?At=i{5K3Q$vLRa{aqKr->ZDOlRG7tO(|D4-h$-Rhx7!Q$&I^^5%_wAL
ztefrfBFP$t9-kKsNw2Ueo2n_#vSv)R+U(6HwaSFpG+XJtnS=(my~>+$FYeUO2Dnza
zFPo!TXHCm@e4X*&L!-Al;Wx*(IXd~ep|L^QZwNlF{k=J&!1BFaK;`9LdDH7HbKYD?
zVbFW=&2hu16v0XF^z=nT_%ZqGu8bukyUpOi*QOQfJXqHdIOH-$CMXokc#e4;4rhbu
zWVT=;UCUghl&RR7=aP0SAq#RAJ-`j7B9@3m<y+)xq#WABxyV#r5O4?sXqC3(?E0bp
zYrQYq`b%pRkfjTrQ!+(bZ+DMFx7|ImKa3ZU9EC6Me_Au!&s~nZ4|8yzOrbZ*zU&<m
zyyvkq_KwiF#rk!*Px#wn^SV4Q{X!P8XfbbCH5umE7H%xoB4!B?T^6g{LI{++GNbXq
zNo3MAg-Xn~iU1gyoG{-262ls^&<($aLjul2gs1?xA%F%XBv39FjIR-XimX@Zu|zIc
zP!x7fe1S*-&oVPfckqSrFq8y+SWiS&a{9my$T3BN2+IRM2!!@v)8AGdk*0lS@P7{=
z;UK*B6}mDkh5<?u2x37PtH|%$T}s$3;w>vRHP?4K;F$Fg<j69p4$u%FTOqHesE{|h
zj^QCgiso2zCH@JxUCn@>ZYDRkQF7m|?r@Z@d5moqtEHVCnsJX!mDUmwWJah>Skp4`
zfsC*QD9)7oZ4~pRW`AG@-OX_q5Br{VH>dRV+yV8=wz&!WLGYZlvF5NJ;EtU+7Sd9{
zqRq99*Ag)IwRh}w*};uLU0ZTbEC%=f);saK$6oK~IXmk1PP!-EZ%o6swmBXcLi6_$
zyY@4rTN1Db_>V)!XGs4H=|K6<kPc*${wE;4N|bH=U4J$njo-YiaAAu?PUNymJb)N&
z&OZ10Fxhr{J)DD%La+V8{Ikw}iBtg$$lyduaiEEcJK)zV2JmA6Cp-tC2bBlP2}a<S
zh8^T`Fi#n6TM(2f7`;iWz^nwRIWmPe;~Vw{XOSEzHZA>F2JA`(01#lKW*JE|9n$=5
z%cNxLeO~p>h6+|qGJVAaun`?p2O@S<cTib71QxUo07h^)uMp`uyxaoR=<r>&^I9!C
z%-v{0Cc!TN2P<FzK41gGxs!3sfX;xfsEiL=gzn#s7H=mv3)UaMXPAOnf4q4A3Yx_V
z2+M#X)(#y2almH?V9wKG3r}0ugW1Jf==aY@SEI#yg2CZvF&@n4Y&e;*KAZMui_yi+
zRe#2&H?!$vKJXZuBN*_k^*@402U0LJz_KVnH9!zr?=fW>F5$4f;%h@Ngj~ZRj&%Ym
z{(ont#bX7)H*BP>UojaGt{*ZYV{k~BXbtygs<oS`LpDkS?~r}nV_;h82b?t7yr7fL
zAsfmFtYa7}l^?Qmr3;lFvg<zU9`$-%xA&skV>k1D%?Ch5*(H&@O96UfC`ipLz|;n)
zz}?LQvFp-31K@^O;$_VOFX_DbK+_vkd~lFHQt1)HtofzoNyH9`XF_UN=1xj%6VjS%
z?kc>IsTid=%41^@?^G!WmEm1v&-1RR<rOFnNfWwCq4U12^X64u3~964wT}taypD=x
z<yo!@6{r{yFn33ESz#V~u|$Dk3**-UOpY#5YQ}~NZ*E3{ZAvTeo)^Q_3;05JXv7vP
zh6nvVq&6Co?x{N12za659V!et$L?xWj8Sp9#a$ROfu(VEE-^=8EY1fFzDI+(z1YH9
z2NkWat?F@2<pw<6a#0#Ig_YXehsA_}RT_q!BVSetq2)~;s!z-qKBexxHP+waiiI*H
z<3S)|q&GS<SRYz2I3*9JpvySD?907#$j@bgR4RB<HyB>^-^|&#e?6ePy!siR-(F0v
zr<3tuyqL4|(PDl(9n5a0qdyL=T4w%j1lmFU$B^Yubk)8c)`sYf%wNZAl!g+=*%BGK
zI^-Z~4?4db%r9o6>0&e)*Ug;PyujpR+;y}2v(e968^`CPMtgFjzD3p5xy){b>LRsT
zr=+KwE3jU1f(AkL^n}$aItT?$oc0dTW?*|2NcW@kELH?-_EvQT(5Y6hSqMeF1+dsv
zxm=ml_RL<6JgZ=&K1tQ*lWOkjo7}wi{^9**b#$f-T{WrvstjIB>~-W>o-fPfUv}{S
W`N6#H{q~dk!ra!GJN@d@y#5O*7h!Dx

diff --git a/scantext/tests/data/matched_text/ffmpeg/ffmpeg.exe b/scantext/tests/data/matched_text/ffmpeg/ffmpeg.exe
deleted file mode 100644
index 5a9b37bdb1d6e965201cbd5d028ae82b9333203f..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 16136
zcmeI3OK%)kcE>Me1BrX$L4bG>WRT32koJ%cyGcp5<4_jIG%3mi^dLyemZK4*>Z;q_
zWz|dd=xz$eAd9R51n?psAxN^w#=syLWHn!4fP8~EK$da7K#(ZEb8l5YD2-^(x|{5-
zs{4Na&;Oiz>h@oLrCw7?z0UXfbESUCRX!it-&g<q712NZ@xT61{o;*ZU;O3DtzTa}
z8HR<<@^p}UvG%<rNlV=`I<FEPCVKDgv5wQgbl-fl^`~{whc}eEwep7g>!)8|XlnaL
zZM^n_l?y*o|M!n8>e7!csK5C;_03zv{yty*Pb;={a!(FvHv#s_MX7(I90LFLFaG$#
zYw9DSw96-%B@dta;vOD<ec=yx-R*Dl`}(iQ|B{UV`Nh{4zIx*&5p__vG?SA1f4=z-
zE4D$OaFx!?eIBc?9(VK9ru?H{(3Que`4_IYxXyjFI(*y>ywW2q`1{w@PF1Fzz?8->
zO`wY~c~a<pn(KD4)gsBRo0ljoN*CGHcbKOz&LR_=ggo8WOA@tIk^38d98_f(6<uG2
z`ucU9^d9+4sb8p_qA+<GrU~`vz86N+?Uhn#J7_80>8R|<pyQ`WKOFpA{ak6STJ6F;
zSGgxSX<oaKKWV1wbWGy)BGVaU5!vg9k;w~^UY&&mlaY62_85h|xIdXaCnJ;ivqx_f
z7N<!$oQ3qBk!AMSPm|KMYA!4{;buO29wb#66|?7%45Qg2<$1+iG|#<=KAtWl=@nPz
zswIu@`QAc`er`;8nwf>bL44-9h!^6+s5jSLie<}XQ8idd6lYf>?@U2y#z3XH>3DIT
zEVgVuYW!l<@XGt|E)*~6zsSsDooQArj!9m|*+RA=@v>}jkcxCNH}3SN$c&krT|&yC
zN%O^lE63>z-5Z5LQF@DwAEnaSac^OTP~<4hdm!0#(MeFYSnJpuWuE_JA<fwI7CSUK
zJ1_*iw~%GxjTY;f(9VSv&%$_q9G}H=U1j4viP+z>46>G0J5asQulTBd<P8d?+F7Sv
zcB6Elw$yd@*1p@HWvVkTFO1&bKe};v_vl2MWEAFUA~>K&ULMK@++q{ARG%HKA~Q0P
zI!H!dL^E@frFp3(M%$Q{x|cjj(s81nm{a|ecJULz{ibl)-KxF$ZqVMmV!xf{My~2^
zRqC{_V-u(OsqR%_6okoOp^}6(+fw8C_RCUh(p+j6mt13kASOE({1ZT;PFt7#v8Y6a
zTT-k2X&t$ajPtN8DUM;0qCmm4iUKWC%!{Jbr&0aB$$`fk?34f`QhTPp*;ie4tnRCi
z)!t%HES3rM>QB4!(bx5-VPMjh-f5n9svzX~xOqOVdSzHfrlq_4b(Vc3s_EE4)^QaP
zJ<N?K14N>(<fPo=DD;1GqV9d0XjkA`v%1YBGkGX8?L|^ZxBIazq{?Jg3mxR%_|Y)U
z!)K3rUJ|$@-94#ink38-G7NvhV8~>}rW~e$zT9F($_L<Y`+PfBHVamf(yLZlaJ!~t
zU~bR`E2WYu?wPz6Q;ajKDy@>h&8AW(L&I{6D$}GGds3br4?}+_osl^jGbk*#c3*e+
zvEx21KTJw6%1Oal@f5*aHf<oy%Tolkd{PKmGhO|%#%O)m4}B}W>b$PWop$H)pBLIz
zU@LJ8rdcnF%=|xS7Y`A7n$Jy>nB0qCO_|pVqp%E3v89zAe;IzQg8Z^pC0xqq-b6?f
zRm7}IE>*8D>!Z+$N!7|9iU8A!z3s*R&`SnJt7bP-hFv-&PSV8K%1@fbbhdh1td{Bv
zSzF3JD|hcfGi>g9Ik4AkG_stgGwHzwKt$J?^+JLyoR~-kD)vg7h9$9(pzwr-=%$R9
z#Z7aweik!JR};!-Y|=`~7lCHCDNEkn+)1@tfdVDX{8`4sW|xiOK&#LW#z2Do{TNBM
zZWm3G%B;yM3W0>&Xm13N3PyJ8bUF?P!%`=P&XQTkFIC5xr9-knwHE|TZkjOHP(YK*
z4WOL^wx$4D&a^`owiGc)!e^A(JFP1s<z=<WLg;(I0k}JtD9tPnlde$S?XZ9Y4ruS7
zvbZkoUr4f(3pXiK+8Yj}33KwHFd_(K=h=}tlegKQhe#1yhaTi<l?jf!8d$@2sZ;7*
z*D4-Y+5U6t0YX0t-FVrwV25S`?Eshgsg%o5N~%+qA`&GNN#RP9h(t5n5@rPvog1-K
zwGc(Gt8b?xqrruu$<Z_PL4v>%5nRk<&qQf5D5ym`zate%rF9iegWc>kgV?rnbzJ$r
zDT;m-(KaheA-yk*0i_0}R}EC(U6eYa#fw$QVo6yZZgt9zi>nEU$XYhJ<=$f<;6nwA
z8OzU59v{&cwvs$Iw`$LVGVQ7CloaE__vmLJP%qTE*l?Me8EW9OaHwvAb<Gy^U<DOl
z8K(||2Mc5KKI+HK&<k?rIpM63#lFzpV`kKaN`5v#OpjnHhPWdqx0>O#%L?J6G#&AW
ztOE<}G85;oT}ic=&q{&WT9?vEG@quPR$H%KiHj3k=U6Ov=hJ%i+FUQs;6_6h!Q9Ng
zBK`Mh(<WW`O*`BU)JYBw&8kH(8rHZKp+bOOoCMk9VG<TH-o+~>3yNo0Uk&4fK{^g1
z{HUuBQ=kR(bL_9MJJv~ZSusd9N28urp(&9@i4aos)JPdwIcn_X35`}^(oYrpwU-tK
z3Fjebnu=&=mUgx+jn!}K)y>Jfn|$<_dUNuD+^wmRay(QuJQD=9>kU{<42}316y#?U
z*xp19VP3*!khRl3#opwRqj$DbcHMQ@^n$!|O0gS9aOCc>5Lt(OwKmGL#T#~)zS_Og
zy&`K@@21&lZpFc>zlJ#Mw2=vH2PaW<AY2;y3X2^0zJ9O!Zg*3!-rU_?qd1|zj?sXh
zQs1JYJR|%|<#=98TPnv(<+xK@iePguQI2IVeYSIS=ituGt(te8BA~=7RkM639K0G)
z)KcC^j9uh_&0j7SlQ{tJMpj^xvX;c&h<z<onGm)xI8zH?uDNma7Dy{f%%nagL;YOg
z?JGgIebs*ThMs#8KmuPt9<c}MBJGy|r@3Z15=K_&VSp_!57C+|ok7%YAeXWGg#FVN
z{02n|CLz#6dt#uuZy#>nxi?ii4yzsnU*DqT2~r;soJok4553ctg)OQ5xUOCAi+X1D
zo@?5`YpI|HLw&WWpe}74S<qU=76_?pnCBX_?G$oDgPMHZ7WP2t=Bzh?m=C1xAvnnm
zWzd~*h|&YIhMqvY)?1=2es*y3$=!P=dgspPn&~~-xpVUQHOmEHS{kHv<p`b&)nov3
zFDXxHY3uflquozPzw`0It%H-#h3f1doZPu_e608H9_bx@xN~%JuzT;;&XGR6cXW97
z_(m6<hQXj^tyc^pD}qTt4r7Z&JVO{;pF?Ylp|vUuJ%}l!-Gn3IJS;9uU+2zD%M+^V
zNDdTK>w~@ss&!bH(H{=WGTYkN7>~zYOqAU;A8d%)#*7vl+YklhrMiLOM`F9>ijKtE
zYUw@E{Pzr$Q%J;=38M4gAma85!A`TsQj2PT`90BPTd20O?kVv=-4gj{^(nD??1^o`
zRlsI>p@KTnnpDn4cEx5w3#`S2A!1L99b%^|6{!FEgKt;V?<)0G|0DI^a?j^~>M&h=
zr2d!t-+AlXl{eq|*4n*L{%Z1PwZFu8xz%cfT!|7VYKA58Hq>qK<C@)CB1im87n*7`
zHcHrn<y($FG$6DT&r9*_PB8%brS1BB+qF^*R3keP&=$S6^v%OtjhqnqB6_jN)}OND
zZzKLZZO=&>$&GX<$EL=7yFHyOZVUyP7r@7z9YR!X5i#@0oQYvBm27IUcXdLrlv6H1
zvM$kPo3Cy@yeWGwtflUN6k@h>l}M7>d?5xUSOFRV%Db5LkqfKsU=85*!`!bT;HoYO
zOBa%{Ipb?7)aMOBzZXBA=c~g!MO<2~wZMvWy)7y}hZm+SxStjSir0zc7>F2m<mcwY
z+8H*Qv%hz;C*>n&4L|4Lujp)ZMhHDIUR-Q%DmvJ+C~re#7P8Eq5Yluo6a%ic_DC{k
z>~&d5*s^&UlUIb)2ahc!M^frwkYK*1Kn57GpCc&~MmUT;4jAfE>@h)!ZKO6Cf0sO%
zyQeu$#2^a~q82x#BEw~rrEAEGb`;TaE4dzFtuBq6yJEVljj(D8k4s509ST<5QjuFK
zat()ixgrN%%pudclHwvJ($4moeW>4lTe}EHhi5x<76THMsh>u#jO#_fsomMLlhvR?
z*8m!fwy_)2-QQCC7EP^%2}GJk)MQlcl%;1ap;_pc6;gpjQ_#~xl(Votffb-x*I|uw
z;5B{B&C|+Y4|Wjx;dQ+!Qeh9lk^!DTdtPO7Fj~XaYF+Ym0Bfx^wTjiDA*$oVf-hyv
z`UaZIfv9xORpVrVJsO#|bXM5Cll{&IqRTOraCjiRKH<dX&LPM(OrfX1J>uKiPCG<D
zLd0>d=+}(Br7ogsmy^ymr*!tvq_gene1s_KY>WGg{_;x>WL}Yo49J+osLGvEJJ%ID
zwrh`mHYe>#V+^nLPBHAWw2BfLj1}=gJ9sFAIYp`^XitY?3EE3~|DEZ*#o==?dtEm>
zRpG-@rWy1%hdpsnR;F?NAi)k)Y@}W-#vB!zU`^=a0FzsRfu+gv@`TQsk8^^!C9Df^
z%ME@Wpbr#O4%XY^xd1!f;M4(UlKWzzS0w9YK5c9l_GhDA@NaL44<>C6zoGA3*OY#6
zjP<CEyGfw;n!AU#E;%G$*H59_oDXof8*vH}=v~hqa2A9RP(<`YYW(CG74XkzYDDDz
z)3B&;$$91+5wM~8RZja$6p1w9XKK-L1Yh(W&nJxlIA@YT`IAPq5H&$;X2%D&_m;K{
z+-X9bE0%tEOFz8t?}uj@YO_txMZ3n8V_0uUHS8&LV|~_t1Lf94nIMqFisjW^x3-#l
zXXLC8D6m>QRUW*@ALePSPVo+st4~N&#Y8CUy?{A<fParIi}2epqU-JZ>)NKSpWr}&
zwnw+ZKy}kIph@VdsKD;LSN-bw^XLD*11{YZpc21fcG2woV`FxUA2DFl+0G&_!MuS-
zahNnu=U5hOzFt-|nYeXj`9p|Zg)>)OTJtPznbMUdbDIvulDRFJ+jnGcvyFQ$oTYxG
z(xT5dS0VQy_aXPCT=8Q9k5t8Xrc{cnnS(BI60%AxM97&*YrjCJrbDDePpg$+OHyX(
zfd%meJ1@f&l&p8uMq}6%g?iufODxRWD9*2bsbNz&bL^F4vG+;(I865KsqZwzrI<b*
zkB`M0E8mh++sYTr+`t-)1~0cRv9v+D*xBZ{rB>f@!G~$F_8@$yw?(5r8~CB7L!YS^
z!v9Qvp<SG4#}{2m4?_J;U6*YbiA1fPwe6Y7IqAYPmtg?tWr%1V=7lkSFSrer`?ix)
zYwHqMmn2an{E{U8mL#!p(me0Ht9`k9702!`@n}?LIMmKBt_H%NUWA{aVe;Sh(Iq=_
zwm*Hz!M^E0xV&n?PbJUp#|BpFghLR~kDbbEU9j;%<s~sJbG4(p0gkLCu}%XBTlG?m
zfn%`R9x+`Nx4ezZo12&AobLVE)BYizb4uN_Ha_fd9nN0xA<1jMaaW`D8n=*s_Do)m
zT%>*YI`9IQH`&s|;Z_`P6~%+CYu$%^@%Wcl`x*zlDZg1#z3EUasos+6eMhS2eBaKc
zc{Pu6si!pR;wATnm~pR^+5^!`?9JIn-gT81kDIT=$UX1$E_}tQN5Yz`eZ{!(ffE)e
zZyt#URUfU^9^_k(EZha|kHS6YGNf05&F&@#>G)sau-^EvO&!g^DLGZ)xOPBUVj2^j
zmdB9ypbGL}2o&hy6hPCK-{boYDz03ATtELu{ro39|A4Pt`K+w0*rvOu@BMp#<!&(G
zG{^R=p9;>x#I`i=MbM%+v-VAxqgeDHceH(gXTW+3-CR7gPjNtluDMh2#vUq^&bg$`
ztbv`;$O;pD)ZP)FGAYk$`g8AT;AWktkXCs^0N@rM!=ad36CA%wy!PbXIBbqBjb(sS
z8fzn%wlPiCt-~TGJ>-%>minI1K=;01aaxon?vnY0u6cEEhgXS8IbxDG%EjBw<;C+a
z(6>!2u^+roo>g&qqa+v5rPei4@3>*JXZ4)Q%j<o@vT-3~-B@SJRs2fC!9xE(suGPP

diff --git a/scantext/tests/data/matched_text/ffmpeg/libavsample.lib b/scantext/tests/data/matched_text/ffmpeg/libavsample.lib
deleted file mode 100644
index 27ff56d4f012ac718c6b649f40015f924af2db2f..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 1783
zcmb_c&2H2%5caA_pcN0{9#%cM+1+lpw4z8A3$#)T2nyHYb(~$Rj%{owoA$z`FT)G)
z0^Cb4TzLad@BmCU+r}vfs36iL^YhK;u|1RJ`_!`OgLZ_QPj7v5bFkT8>-L-e?!xSC
zbl0lTn>IqNj|iP%gwDVG7irzTK&{F5f0K6>wJtvVR_-E%KDQ8BLPvKHnt)tB??G8Y
zx8PlAW$5c1xx9d!FTPDog)VtN&Cw<2*&len&5;kcqC>5vi*ja;C($?)GWM2p7bQt*
z_#|?cMFZOF(P2V1*ZTdzaAW;mca=#_RV2mO%`A5`HIhfQy2{XwbKH8N74A5kP$7AY
zsRv`L6Sol|#_~iI+YxK5ZzwyaldIK_is69Fgo^dd5$CBFjHEHidxJhE#6#h1N`=}^
zUCzM46&*`Xs7q_(q_Se+=CN1@z?hG-Ne#7`@*?GgnJng>GY_H5yy`R>4kPJF(_!_P
z*K{;zI_AN0G>>^MSOb!1o5HcwwdaYDp4))fXy=U$>#h&FPINQJs4TfJOQyMSC(2I)
z*Z34}EU*RBDi3UkCh;+$w5<A)N1moMG{rbIYm%nnS}co0jLNwRDHQBHv{{ymIN;O<
z@ba3&xzGcpX_L7C<6>EM!Bv^ii;5Z(-bvY?x(E-m^cc>IW}=^RZNm)!EZ24{%Jmjq
zRl3Zzycx~Qj%|_^{o&vz&w5R?xC35zO9@LPow)54GTPmF{pje`)4i96qodvJ?g}Kp
zL=w26L`+BpBa0y@$-;DUFsfZeu>c>8P`TZ!0mTxUPdUDY_n$o9$2qsIoX`Gh4{M7h
zg|<VQNu*r@Vvzt{fG+0x6k=19fjq9J*=y{$$J&%r-bOF@+l)I8V=k1*Jg$BUc+ub;
za65?PR0^2?8mu#a19XGhLdEzmiGPLNo7V|{5T&?;{hvTBxLb6jEP`Uov&odpjL{Z$
Ox7tR}s*-P>*!}@@M?K8|

diff --git a/scantext/tests/data/matched_text/index/rules/gpl-2.0_bare_single_word.RULE b/scantext/tests/data/matched_text/index/rules/gpl-2.0_bare_single_word.RULE
deleted file mode 100644
index c0e32dd8e..000000000
--- a/scantext/tests/data/matched_text/index/rules/gpl-2.0_bare_single_word.RULE
+++ /dev/null
@@ -1 +0,0 @@
-GPLv2
\ No newline at end of file
diff --git a/scantext/tests/data/matched_text/index/rules/gpl-2.0_bare_single_word.yml b/scantext/tests/data/matched_text/index/rules/gpl-2.0_bare_single_word.yml
deleted file mode 100644
index d78d0c44d..000000000
--- a/scantext/tests/data/matched_text/index/rules/gpl-2.0_bare_single_word.yml
+++ /dev/null
@@ -1,3 +0,0 @@
-license_expression: gpl-2.0
-is_license_reference: yes
-relevance: 80
\ No newline at end of file
diff --git a/scantext/tests/data/matched_text/index/rules/gpl-2.0_or_apache-2.0_2.RULE b/scantext/tests/data/matched_text/index/rules/gpl-2.0_or_apache-2.0_2.RULE
deleted file mode 100644
index 995ec316a..000000000
--- a/scantext/tests/data/matched_text/index/rules/gpl-2.0_or_apache-2.0_2.RULE
+++ /dev/null
@@ -1,3 +0,0 @@
-//  This source code is licensed under both the GPLv2 (found in the
-//  COPYING file in the root directory) and Apache 2.0 License
-//  (found in the LICENSE.Apache file in the root directory).
\ No newline at end of file
diff --git a/scantext/tests/data/matched_text/index/rules/gpl-2.0_or_apache-2.0_2.yml b/scantext/tests/data/matched_text/index/rules/gpl-2.0_or_apache-2.0_2.yml
deleted file mode 100644
index 41746474c..000000000
--- a/scantext/tests/data/matched_text/index/rules/gpl-2.0_or_apache-2.0_2.yml
+++ /dev/null
@@ -1,6 +0,0 @@
-license_expression: gpl-2.0 OR apache-2.0
-is_license_notice: yes
-referenced_filenames:
-    - COPYING
-    - LICENSE.Apache
-notes: seen in RocksDB
diff --git a/scantext/tests/data/matched_text/index/rules/mit_101.RULE b/scantext/tests/data/matched_text/index/rules/mit_101.RULE
deleted file mode 100644
index 722e438fd..000000000
--- a/scantext/tests/data/matched_text/index/rules/mit_101.RULE
+++ /dev/null
@@ -1,2 +0,0 @@
-This source code is licensed under the MIT license found in the
-LICENSE file in the root directory of this source tree.
diff --git a/scantext/tests/data/matched_text/index/rules/mit_101.yml b/scantext/tests/data/matched_text/index/rules/mit_101.yml
deleted file mode 100644
index ca1a71366..000000000
--- a/scantext/tests/data/matched_text/index/rules/mit_101.yml
+++ /dev/null
@@ -1,5 +0,0 @@
-license_expression: mit
-is_license_notice: yes
-relevance: 100
-referenced_filenames:
-    - LICENSE
\ No newline at end of file
diff --git a/scantext/tests/data/matched_text/query.txt b/scantext/tests/data/matched_text/query.txt
deleted file mode 100644
index d5dc1521d..000000000
--- a/scantext/tests/data/matched_text/query.txt
+++ /dev/null
@@ -1,4 +0,0 @@
-#  This source code is licensed under both the Apache 2.0 license (found in the
-#  LICENSE file in the root directory of this source tree) and the GPLv2 (found
-#  in the COPYING file in the root directory of this source tree).
-#  You may select, at your option, one of the above-listed licenses
diff --git a/scantext/tests/data/matched_text/spdx/query.txt b/scantext/tests/data/matched_text/spdx/query.txt
deleted file mode 100644
index 0ef045154..000000000
--- a/scantext/tests/data/matched_text/spdx/query.txt
+++ /dev/null
@@ -1,12 +0,0 @@
-@REM ## @file
-@REM # Makefile
-@REM #
-@REM # Copyright (c) 2007 - 2018, Intel Corporation. All rights reserved.<BR>
-@REM # SPDX-License-Identifier: BSD-2-Clause-Patent
-@REM #
-
-@echo off
-setlocal
-set TOOL_ERROR=0
-SET NMAKE_COMMAND=%1
-SHIFT
diff --git a/scantext/tests/data/matched_text/tokenize_matched_text_query.txt b/scantext/tests/data/matched_text/tokenize_matched_text_query.txt
deleted file mode 100644
index f4d5c8efa..000000000
--- a/scantext/tests/data/matched_text/tokenize_matched_text_query.txt
+++ /dev/null
@@ -1 +0,0 @@
-the MODULE_LICENSE_GPL+ foobar
\ No newline at end of file
diff --git a/scantext/tests/data/matched_text/turkish_unicode/query b/scantext/tests/data/matched_text/turkish_unicode/query
deleted file mode 100644
index 19adb4ef5..000000000
--- a/scantext/tests/data/matched_text/turkish_unicode/query
+++ /dev/null
@@ -1,20 +0,0 @@
-# Licensed under the Apache License, Version 2.0
-next_label=İrəli
-
-Some stuff here
-İ license MIT
-
-next_label=İrəli
-
-
-İ license MIT
-
-Some stuff here
-Some more stuff here
-
-# Licensed under the Apache License, Version 2.0
-next_label=İrəli
-
-lİcense MİT
-
-some more
diff --git a/scantext/tests/data/matched_text/turkish_unicode/rules/rule1.RULE b/scantext/tests/data/matched_text/turkish_unicode/rules/rule1.RULE
deleted file mode 100644
index f0ec0e607..000000000
--- a/scantext/tests/data/matched_text/turkish_unicode/rules/rule1.RULE
+++ /dev/null
@@ -1 +0,0 @@
-İ license MIT
\ No newline at end of file
diff --git a/scantext/tests/data/matched_text/turkish_unicode/rules/rule1.yml b/scantext/tests/data/matched_text/turkish_unicode/rules/rule1.yml
deleted file mode 100644
index 864a8c3ca..000000000
--- a/scantext/tests/data/matched_text/turkish_unicode/rules/rule1.yml
+++ /dev/null
@@ -1 +0,0 @@
-license_expression: mit
diff --git a/scantext/tests/data/matched_text/turkish_unicode/rules/rule2.RULE b/scantext/tests/data/matched_text/turkish_unicode/rules/rule2.RULE
deleted file mode 100644
index 7ca4781d2..000000000
--- a/scantext/tests/data/matched_text/turkish_unicode/rules/rule2.RULE
+++ /dev/null
@@ -1,2 +0,0 @@
-# Licensed under the Apache License, Version 2.0
-next_label=İrəli
diff --git a/scantext/tests/data/matched_text/turkish_unicode/rules/rule2.yml b/scantext/tests/data/matched_text/turkish_unicode/rules/rule2.yml
deleted file mode 100644
index a4f80f07b..000000000
--- a/scantext/tests/data/matched_text/turkish_unicode/rules/rule2.yml
+++ /dev/null
@@ -1 +0,0 @@
-license_expression: apache-2.0
diff --git a/scantext/tests/data/matched_text/turkish_unicode/rules/rule3.RULE b/scantext/tests/data/matched_text/turkish_unicode/rules/rule3.RULE
deleted file mode 100644
index 7b767dbba..000000000
--- a/scantext/tests/data/matched_text/turkish_unicode/rules/rule3.RULE
+++ /dev/null
@@ -1 +0,0 @@
-Licensed under the Apache License, Version 2.0
diff --git a/scantext/tests/data/matched_text/turkish_unicode/rules/rule3.yml b/scantext/tests/data/matched_text/turkish_unicode/rules/rule3.yml
deleted file mode 100644
index 1443a0848..000000000
--- a/scantext/tests/data/matched_text/turkish_unicode/rules/rule3.yml
+++ /dev/null
@@ -1 +0,0 @@
-license_expression: proprietary-license
diff --git a/scantext/tests/data/matched_text/turkish_unicode/rules/rule4.RULE b/scantext/tests/data/matched_text/turkish_unicode/rules/rule4.RULE
deleted file mode 100644
index d00dc0e28..000000000
--- a/scantext/tests/data/matched_text/turkish_unicode/rules/rule4.RULE
+++ /dev/null
@@ -1 +0,0 @@
-lİcense MİT
\ No newline at end of file
diff --git a/scantext/tests/data/matched_text/turkish_unicode/rules/rule4.yml b/scantext/tests/data/matched_text/turkish_unicode/rules/rule4.yml
deleted file mode 100644
index 864a8c3ca..000000000
--- a/scantext/tests/data/matched_text/turkish_unicode/rules/rule4.yml
+++ /dev/null
@@ -1 +0,0 @@
-license_expression: mit
diff --git a/scantext/tests/data/matched_text/unicode_text/main3.js b/scantext/tests/data/matched_text/unicode_text/main3.js
deleted file mode 100644
index f0ec0e607..000000000
--- a/scantext/tests/data/matched_text/unicode_text/main3.js
+++ /dev/null
@@ -1 +0,0 @@
-İ license MIT
\ No newline at end of file
diff --git a/scantext/tests/test_match_text.py b/scantext/tests/test_match_text.py
deleted file mode 100644
index 2f46273fd..000000000
--- a/scantext/tests/test_match_text.py
+++ /dev/null
@@ -1,1490 +0,0 @@
-# -*- coding: utf-8 -*-
-#
-# Copyright (c) nexB Inc. and others. All rights reserved.
-# ScanCode is a trademark of nexB Inc.
-# SPDX-License-Identifier: Apache-2.0
-# See http://www.apache.org/licenses/LICENSE-2.0 for the license text.
-# See https://github.com/nexB/scancode-toolkit for support or download.
-# See https://aboutcode.org for more information about nexB OSS projects.
-#
-import os
-
-from commoncode.testcase import FileBasedTesting
-from licensedcode import cache
-from licensedcode import index
-from licensedcode import models
-from licensedcode.spans import Span
-
-from scantext.match_text import Token
-from scantext.match_text import get_full_matched_text
-from scantext.match_text import reportable_tokens
-from scantext.match_text import tokenize_matched_text
-
-TEST_DATA_DIR = os.path.join(os.path.dirname(__file__), "data")
-
-
-class TestCollectLicenseMatchTexts(FileBasedTesting):
-    test_data_dir = TEST_DATA_DIR
-
-    def test_get_full_matched_text_base(self):
-        rule_text = """
-            Copyright [[some copyright]]
-            THIS IS FROM [[THE CODEHAUS]] AND CONTRIBUTORS
-            IN NO EVENT SHALL [[THE CODEHAUS]] OR ITS CONTRIBUTORS BE LIABLE
-            EVEN IF ADVISED OF THE [[POSSIBILITY OF SUCH]] DAMAGE
-        """
-
-        rule = models.Rule(stored_text=rule_text, license_expression="test")
-        idx = index.LicenseIndex([rule])
-
-        querys = """
-            foobar 45 . Copyright 2003 (C) James. All Rights Reserved.
-            THIS IS FROM THE CODEHAUS AND CONTRIBUTORS
-            IN NO EVENT SHALL THE best CODEHAUS OR ITS CONTRIBUTORS BE LIABLE
-            EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. chabada DAMAGE 12 ABC dasdasda .
-        """
-        result = idx.match(query_string=querys)
-        assert len(result) == 1
-        match = result[0]
-
-        # Note that there is a trailing space in that string
-        expected = """Copyright [2003] ([C]) [James]. [All] [Rights] [Reserved].
-            THIS IS FROM THE CODEHAUS AND CONTRIBUTORS
-            IN NO EVENT SHALL THE [best] CODEHAUS OR ITS CONTRIBUTORS BE LIABLE
-            EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. """
-        matched_text = "".join(
-            get_full_matched_text(match, query_string=querys, idx=idx, _usecache=False)
-        )
-        assert matched_text == expected
-
-        expected_nh = """Copyright 2003 (C) James. All Rights Reserved.
-            THIS IS FROM THE CODEHAUS AND CONTRIBUTORS
-            IN NO EVENT SHALL THE best CODEHAUS OR ITS CONTRIBUTORS BE LIABLE
-            EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. """
-        matched_text_nh = "".join(
-            get_full_matched_text(
-                match, query_string=querys, idx=idx, _usecache=False, highlight=False
-            )
-        )
-        assert matched_text_nh == expected_nh
-
-        expected_origin_text = """Copyright 2003 (C) James. All Rights Reserved.
-            THIS IS FROM THE CODEHAUS AND CONTRIBUTORS
-            IN NO EVENT SHALL THE best CODEHAUS OR ITS CONTRIBUTORS BE LIABLE
-            EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. """
-        origin_matched_text = "".join(
-            get_full_matched_text(
-                match,
-                query_string=querys,
-                idx=idx,
-                highlight_not_matched="{}",
-            )
-        )
-        assert origin_matched_text == expected_origin_text
-
-    def test_get_full_matched_text(self):
-        rule_text = """
-            Copyright [[some copyright]]
-            THIS IS FROM [[THE CODEHAUS]] AND CONTRIBUTORS
-            IN NO EVENT SHALL [[THE CODEHAUS]] OR ITS CONTRIBUTORS BE LIABLE
-            EVEN IF ADVISED OF THE [[POSSIBILITY OF SUCH]] DAMAGE
-        """
-
-        rule = models.Rule(stored_text=rule_text, license_expression="test")
-        idx = index.LicenseIndex([rule])
-
-        querys = """
-            foobar 45 Copyright 2003 (C) James. All Rights Reserved.
-            THIS IS FROM THE CODEHAUS AND CONTRIBUTORS
-            IN NO EVENT SHALL THE best CODEHAUS OR ITS CONTRIBUTORS BE LIABLE
-            EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. chabada DAMAGE 12 ABC
-        """
-        result = idx.match(query_string=querys)
-        assert len(result) == 1
-        match = result[0]
-
-        # Note that there is a trailing space in that string
-        expected = """Copyright [2003] ([C]) [James]. [All] [Rights] [Reserved].
-            THIS IS FROM THE CODEHAUS AND CONTRIBUTORS
-            IN NO EVENT SHALL THE [best] CODEHAUS OR ITS CONTRIBUTORS BE LIABLE
-            EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. """
-
-        matched_text = "".join(
-            get_full_matched_text(match, query_string=querys, idx=idx, _usecache=False)
-        )
-        assert matched_text == expected
-
-        # the text is finally rstripped
-        matched_text = match.matched_text(_usecache=False)
-        assert matched_text == expected.rstrip()
-
-        # test again using some HTML with tags
-        # Note that there is a trailing space in that string
-        expected = """Copyright <br>2003</br> (<br>C</br>) <br>James</br>. <br>All</br> <br>Rights</br> <br>Reserved</br>.
-            THIS IS FROM THE CODEHAUS AND CONTRIBUTORS
-            IN NO EVENT SHALL THE <br>best</br> CODEHAUS OR ITS CONTRIBUTORS BE LIABLE
-            EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. """
-        matched_text = "".join(
-            get_full_matched_text(
-                match,
-                query_string=querys,
-                idx=idx,
-                highlight_not_matched="<br>{}</br>",
-                _usecache=False,
-            )
-        )
-        assert matched_text == expected
-
-        # test again using whole_lines
-        expected = """            foobar 45 Copyright 2003 (C) James. All Rights Reserved.
-            THIS IS FROM THE CODEHAUS AND CONTRIBUTORS
-            IN NO EVENT SHALL THE best CODEHAUS OR ITS CONTRIBUTORS BE LIABLE
-            EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. chabada DAMAGE 12 ABC\n"""
-        matched_text = "".join(
-            get_full_matched_text(
-                match,
-                query_string=querys,
-                idx=idx,
-                highlight_not_matched="{}",
-                whole_lines=True,
-            )
-        )
-        assert matched_text == expected
-
-    def test_get_full_matched_text_does_not_munge_underscore(self):
-        rule_text = "MODULE_LICENSE_GPL"
-
-        rule = models.Rule(stored_text=rule_text, license_expression="test")
-        idx = index.LicenseIndex([rule])
-
-        querys = "MODULE_LICENSE_GPL"
-        result = idx.match(query_string=querys)
-        assert len(result) == 1
-        match = result[0]
-
-        expected = "MODULE_LICENSE_GPL"
-        matched_text = "".join(
-            get_full_matched_text(match, query_string=querys, idx=idx, _usecache=False)
-        )
-        assert matched_text == expected
-
-    def test_get_full_matched_text_does_not_munge_plus(self):
-        rule_text = "MODULE_LICENSE_GPL+ +"
-
-        rule = models.Rule(stored_text=rule_text, license_expression="test")
-        idx = index.LicenseIndex([rule])
-
-        querys = "MODULE_LICENSE_GPL+ +"
-        result = idx.match(query_string=querys)
-        assert len(result) == 1
-        match = result[0]
-
-        expected = "MODULE_LICENSE_GPL+ +\n"
-        matched_text = "".join(
-            get_full_matched_text(match, query_string=querys, idx=idx, _usecache=False)
-        )
-        assert matched_text == expected
-
-    def test_tokenize_matched_text_does_cache_last_call_from_query_string_and_location(
-        self,
-    ):
-        dictionary = {"module": 0, "license": 1, "gpl+": 2}
-        location = None
-        query_string = "the MODULE_LICENSE_GPL+ foobar"
-        result1 = tokenize_matched_text(location, query_string, dictionary)
-        result2 = tokenize_matched_text(location, query_string, dictionary)
-        assert result2 is result1
-
-        location = self.get_test_loc("matched_text/tokenize_matched_text_query.txt")
-        query_string = None
-        result3 = tokenize_matched_text(location, query_string, dictionary)
-        assert result3 is not result2
-        assert result3 == result2
-
-        result4 = tokenize_matched_text(location, query_string, dictionary)
-        assert result4 is result3
-
-    def test_tokenize_matched_text_does_return_correct_tokens(self):
-        querys = """
-            foobar 45 Copyright 2003 (C) James. All Rights Reserved.  THIS
-            IS FROM THE CODEHAUS AND CONTRIBUTORS
-        """
-        dictionary = dict(
-            this=0, event=1, possibility=2, reserved=3, liable=5, copyright=6
-        )
-        result = tokenize_matched_text(
-            location=None, query_string=querys, dictionary=dictionary
-        )
-        expected = [
-            Token(
-                value="\n",
-                line_num=1,
-                pos=-1,
-                is_text=False,
-                is_matched=False,
-                is_known=False,
-            ),
-            Token(
-                value="            ",
-                line_num=2,
-                pos=-1,
-                is_text=False,
-                is_matched=False,
-                is_known=False,
-            ),
-            Token(
-                value="foobar",
-                line_num=2,
-                pos=-1,
-                is_text=True,
-                is_matched=False,
-                is_known=False,
-            ),
-            Token(
-                value=" ",
-                line_num=2,
-                pos=-1,
-                is_text=False,
-                is_matched=False,
-                is_known=False,
-            ),
-            Token(
-                value="45",
-                line_num=2,
-                pos=-1,
-                is_text=True,
-                is_matched=False,
-                is_known=False,
-            ),
-            Token(
-                value=" ",
-                line_num=2,
-                pos=-1,
-                is_text=False,
-                is_matched=False,
-                is_known=False,
-            ),
-            Token(
-                value="Copyright",
-                line_num=2,
-                pos=0,
-                is_text=True,
-                is_matched=False,
-                is_known=True,
-            ),
-            Token(
-                value=" ",
-                line_num=2,
-                pos=-1,
-                is_text=False,
-                is_matched=False,
-                is_known=False,
-            ),
-            Token(
-                value="2003",
-                line_num=2,
-                pos=-1,
-                is_text=True,
-                is_matched=False,
-                is_known=False,
-            ),
-            Token(
-                value=" (",
-                line_num=2,
-                pos=-1,
-                is_text=False,
-                is_matched=False,
-                is_known=False,
-            ),
-            Token(
-                value="C",
-                line_num=2,
-                pos=-1,
-                is_text=True,
-                is_matched=False,
-                is_known=False,
-            ),
-            Token(
-                value=") ",
-                line_num=2,
-                pos=-1,
-                is_text=False,
-                is_matched=False,
-                is_known=False,
-            ),
-            Token(
-                value="James",
-                line_num=2,
-                pos=-1,
-                is_text=True,
-                is_matched=False,
-                is_known=False,
-            ),
-            Token(
-                value=". ",
-                line_num=2,
-                pos=-1,
-                is_text=False,
-                is_matched=False,
-                is_known=False,
-            ),
-            Token(
-                value="All",
-                line_num=2,
-                pos=-1,
-                is_text=True,
-                is_matched=False,
-                is_known=False,
-            ),
-            Token(
-                value=" ",
-                line_num=2,
-                pos=-1,
-                is_text=False,
-                is_matched=False,
-                is_known=False,
-            ),
-            Token(
-                value="Rights",
-                line_num=2,
-                pos=-1,
-                is_text=True,
-                is_matched=False,
-                is_known=False,
-            ),
-            Token(
-                value=" ",
-                line_num=2,
-                pos=-1,
-                is_text=False,
-                is_matched=False,
-                is_known=False,
-            ),
-            Token(
-                value="Reserved",
-                line_num=2,
-                pos=1,
-                is_text=True,
-                is_matched=False,
-                is_known=True,
-            ),
-            Token(
-                value=".  ",
-                line_num=2,
-                pos=-1,
-                is_text=False,
-                is_matched=False,
-                is_known=False,
-            ),
-            Token(
-                value="THIS",
-                line_num=2,
-                pos=2,
-                is_text=True,
-                is_matched=False,
-                is_known=True,
-            ),
-            Token(
-                value="\n",
-                line_num=2,
-                pos=-1,
-                is_text=False,
-                is_matched=False,
-                is_known=False,
-            ),
-            Token(
-                value="            ",
-                line_num=3,
-                pos=-1,
-                is_text=False,
-                is_matched=False,
-                is_known=False,
-            ),
-            Token(
-                value="IS",
-                line_num=3,
-                pos=-1,
-                is_text=True,
-                is_matched=False,
-                is_known=False,
-            ),
-            Token(
-                value=" ",
-                line_num=3,
-                pos=-1,
-                is_text=False,
-                is_matched=False,
-                is_known=False,
-            ),
-            Token(
-                value="FROM",
-                line_num=3,
-                pos=-1,
-                is_text=True,
-                is_matched=False,
-                is_known=False,
-            ),
-            Token(
-                value=" ",
-                line_num=3,
-                pos=-1,
-                is_text=False,
-                is_matched=False,
-                is_known=False,
-            ),
-            Token(
-                value="THE",
-                line_num=3,
-                pos=-1,
-                is_text=True,
-                is_matched=False,
-                is_known=False,
-            ),
-            Token(
-                value=" ",
-                line_num=3,
-                pos=-1,
-                is_text=False,
-                is_matched=False,
-                is_known=False,
-            ),
-            Token(
-                value="CODEHAUS",
-                line_num=3,
-                pos=-1,
-                is_text=True,
-                is_matched=False,
-                is_known=False,
-            ),
-            Token(
-                value=" ",
-                line_num=3,
-                pos=-1,
-                is_text=False,
-                is_matched=False,
-                is_known=False,
-            ),
-            Token(
-                value="AND",
-                line_num=3,
-                pos=-1,
-                is_text=True,
-                is_matched=False,
-                is_known=False,
-            ),
-            Token(
-                value=" ",
-                line_num=3,
-                pos=-1,
-                is_text=False,
-                is_matched=False,
-                is_known=False,
-            ),
-            Token(
-                value="CONTRIBUTORS",
-                line_num=3,
-                pos=-1,
-                is_text=True,
-                is_matched=False,
-                is_known=False,
-            ),
-            Token(
-                value="\n",
-                line_num=3,
-                pos=-1,
-                is_text=False,
-                is_matched=False,
-                is_known=False,
-            ),
-            Token(
-                value="        \n",
-                line_num=4,
-                pos=-1,
-                is_text=False,
-                is_matched=False,
-                is_known=False,
-            ),
-        ]
-
-        assert result == expected
-
-    def test_tokenize_matched_text_does_not_crash_on_turkish_unicode(self):
-        querys = "İrəli"
-        result = tokenize_matched_text(
-            location=None, query_string=querys, dictionary={}
-        )
-
-        expected = [
-            Token(
-                value="i",
-                line_num=1,
-                pos=-1,
-                is_text=True,
-                is_matched=False,
-                is_known=False,
-            ),
-            Token(
-                value="rəli",
-                line_num=1,
-                pos=-1,
-                is_text=True,
-                is_matched=False,
-                is_known=False,
-            ),
-            Token(
-                value="\n",
-                line_num=1,
-                pos=-1,
-                is_text=False,
-                is_matched=False,
-                is_known=False,
-            ),
-        ]
-        assert result == expected
-
-    def test_tokenize_matched_text_behaves_like_query_tokenizer_on_turkish_unicode(
-        self,
-    ):
-        from licensedcode.tokenize import query_tokenizer
-
-        querys = "İrəli"
-        matched_text_result = tokenize_matched_text(
-            location=None, query_string=querys, dictionary={}
-        )
-        matched_text_result = [t.value for t in matched_text_result]
-        query_tokenizer_result = list(query_tokenizer(querys))
-
-        if matched_text_result[-1] == "\n":
-            matched_text_result = matched_text_result[:-1]
-
-        assert matched_text_result == query_tokenizer_result
-
-    def test_reportable_tokens_filter_tokens_does_not_strip_last_token_value(self):
-        tokens = [
-            Token(
-                value="\n",
-                line_num=1,
-                pos=-1,
-                is_text=False,
-                is_matched=False,
-                is_known=False,
-            ),
-            Token(
-                value="            ",
-                line_num=2,
-                pos=-1,
-                is_text=False,
-                is_matched=False,
-                is_known=False,
-            ),
-            Token(
-                value="foobar",
-                line_num=2,
-                pos=-1,
-                is_text=True,
-                is_matched=False,
-                is_known=False,
-            ),
-            Token(
-                value=" ",
-                line_num=2,
-                pos=-1,
-                is_text=False,
-                is_matched=False,
-                is_known=False,
-            ),
-            Token(
-                value="45",
-                line_num=2,
-                pos=-1,
-                is_text=True,
-                is_matched=False,
-                is_known=False,
-            ),
-            Token(
-                value=" ",
-                line_num=2,
-                pos=-1,
-                is_text=False,
-                is_matched=False,
-                is_known=False,
-            ),
-            Token(
-                value="Copyright",
-                line_num=2,
-                pos=0,
-                is_text=True,
-                is_matched=False,
-                is_known=True,
-            ),
-            Token(
-                value=" ",
-                line_num=2,
-                pos=-1,
-                is_text=False,
-                is_matched=False,
-                is_known=False,
-            ),
-            Token(
-                value="2003",
-                line_num=2,
-                pos=-1,
-                is_text=True,
-                is_matched=False,
-                is_known=False,
-            ),
-            Token(
-                value=" (",
-                line_num=2,
-                pos=-1,
-                is_text=False,
-                is_matched=False,
-                is_known=False,
-            ),
-            Token(
-                value="C",
-                line_num=2,
-                pos=-1,
-                is_text=True,
-                is_matched=False,
-                is_known=False,
-            ),
-            Token(
-                value=") ",
-                line_num=2,
-                pos=-1,
-                is_text=False,
-                is_matched=False,
-                is_known=False,
-            ),
-            Token(
-                value="James",
-                line_num=2,
-                pos=-1,
-                is_text=True,
-                is_matched=False,
-                is_known=False,
-            ),
-            Token(
-                value=". ",
-                line_num=2,
-                pos=-1,
-                is_text=False,
-                is_matched=False,
-                is_known=False,
-            ),
-            Token(
-                value="All",
-                line_num=2,
-                pos=-1,
-                is_text=True,
-                is_matched=False,
-                is_known=False,
-            ),
-            Token(
-                value=" ",
-                line_num=2,
-                pos=-1,
-                is_text=False,
-                is_matched=False,
-                is_known=False,
-            ),
-            Token(
-                value="Rights",
-                line_num=2,
-                pos=-1,
-                is_text=True,
-                is_matched=False,
-                is_known=False,
-            ),
-            Token(
-                value=" ",
-                line_num=2,
-                pos=-1,
-                is_text=False,
-                is_matched=False,
-                is_known=False,
-            ),
-            Token(
-                value="Reserved",
-                line_num=2,
-                pos=1,
-                is_text=True,
-                is_matched=False,
-                is_known=True,
-            ),
-            Token(
-                value=".  ",
-                line_num=2,
-                pos=-1,
-                is_text=False,
-                is_matched=False,
-                is_known=False,
-            ),
-            Token(
-                value="THIS",
-                line_num=2,
-                pos=2,
-                is_text=True,
-                is_matched=False,
-                is_known=True,
-            ),
-            Token(
-                value="\n",
-                line_num=2,
-                pos=-1,
-                is_text=False,
-                is_matched=False,
-                is_known=False,
-            ),
-            Token(
-                value="            ",
-                line_num=3,
-                pos=-1,
-                is_text=False,
-                is_matched=False,
-                is_known=False,
-            ),
-        ]
-
-        match_qspan = Span(0, 1)
-        result = list(
-            reportable_tokens(
-                tokens, match_qspan, start_line=1, end_line=2, whole_lines=False
-            )
-        )
-        expected = [
-            Token(
-                value="Copyright",
-                line_num=2,
-                pos=0,
-                is_text=True,
-                is_matched=True,
-                is_known=True,
-            ),
-            Token(
-                value=" ",
-                line_num=2,
-                pos=-1,
-                is_text=False,
-                is_matched=False,
-                is_known=False,
-            ),
-            Token(
-                value="2003",
-                line_num=2,
-                pos=-1,
-                is_text=True,
-                is_matched=False,
-                is_known=False,
-            ),
-            Token(
-                value=" (",
-                line_num=2,
-                pos=-1,
-                is_text=False,
-                is_matched=False,
-                is_known=False,
-            ),
-            Token(
-                value="C",
-                line_num=2,
-                pos=-1,
-                is_text=True,
-                is_matched=False,
-                is_known=False,
-            ),
-            Token(
-                value=") ",
-                line_num=2,
-                pos=-1,
-                is_text=False,
-                is_matched=False,
-                is_known=False,
-            ),
-            Token(
-                value="James",
-                line_num=2,
-                pos=-1,
-                is_text=True,
-                is_matched=False,
-                is_known=False,
-            ),
-            Token(
-                value=". ",
-                line_num=2,
-                pos=-1,
-                is_text=False,
-                is_matched=False,
-                is_known=False,
-            ),
-            Token(
-                value="All",
-                line_num=2,
-                pos=-1,
-                is_text=True,
-                is_matched=False,
-                is_known=False,
-            ),
-            Token(
-                value=" ",
-                line_num=2,
-                pos=-1,
-                is_text=False,
-                is_matched=False,
-                is_known=False,
-            ),
-            Token(
-                value="Rights",
-                line_num=2,
-                pos=-1,
-                is_text=True,
-                is_matched=False,
-                is_known=False,
-            ),
-            Token(
-                value=" ",
-                line_num=2,
-                pos=-1,
-                is_text=False,
-                is_matched=False,
-                is_known=False,
-            ),
-            Token(
-                value="Reserved",
-                line_num=2,
-                pos=1,
-                is_text=True,
-                is_matched=True,
-                is_known=True,
-            ),
-            Token(
-                value=".  ",
-                line_num=2,
-                pos=-1,
-                is_text=False,
-                is_matched=False,
-                is_known=False,
-            ),
-        ]
-
-        assert result == expected
-
-        # test again with whole lines
-        match_qspan = Span(0, 1)
-        result = list(
-            reportable_tokens(
-                tokens, match_qspan, start_line=1, end_line=2, whole_lines=True
-            )
-        )
-        expected = [
-            Token(
-                value="\n",
-                line_num=1,
-                pos=-1,
-                is_text=False,
-                is_matched=False,
-                is_known=False,
-            ),
-            Token(
-                value="            ",
-                line_num=2,
-                pos=-1,
-                is_text=False,
-                is_matched=False,
-                is_known=False,
-            ),
-            Token(
-                value="foobar",
-                line_num=2,
-                pos=-1,
-                is_text=True,
-                is_matched=False,
-                is_known=False,
-            ),
-            Token(
-                value=" ",
-                line_num=2,
-                pos=-1,
-                is_text=False,
-                is_matched=False,
-                is_known=False,
-            ),
-            Token(
-                value="45",
-                line_num=2,
-                pos=-1,
-                is_text=True,
-                is_matched=False,
-                is_known=False,
-            ),
-            Token(
-                value=" ",
-                line_num=2,
-                pos=-1,
-                is_text=False,
-                is_matched=False,
-                is_known=False,
-            ),
-            Token(
-                value="Copyright",
-                line_num=2,
-                pos=0,
-                is_text=True,
-                is_matched=True,
-                is_known=True,
-            ),
-            Token(
-                value=" ",
-                line_num=2,
-                pos=-1,
-                is_text=False,
-                is_matched=False,
-                is_known=False,
-            ),
-            Token(
-                value="2003",
-                line_num=2,
-                pos=-1,
-                is_text=True,
-                is_matched=False,
-                is_known=False,
-            ),
-            Token(
-                value=" (",
-                line_num=2,
-                pos=-1,
-                is_text=False,
-                is_matched=False,
-                is_known=False,
-            ),
-            Token(
-                value="C",
-                line_num=2,
-                pos=-1,
-                is_text=True,
-                is_matched=False,
-                is_known=False,
-            ),
-            Token(
-                value=") ",
-                line_num=2,
-                pos=-1,
-                is_text=False,
-                is_matched=False,
-                is_known=False,
-            ),
-            Token(
-                value="James",
-                line_num=2,
-                pos=-1,
-                is_text=True,
-                is_matched=False,
-                is_known=False,
-            ),
-            Token(
-                value=". ",
-                line_num=2,
-                pos=-1,
-                is_text=False,
-                is_matched=False,
-                is_known=False,
-            ),
-            Token(
-                value="All",
-                line_num=2,
-                pos=-1,
-                is_text=True,
-                is_matched=False,
-                is_known=False,
-            ),
-            Token(
-                value=" ",
-                line_num=2,
-                pos=-1,
-                is_text=False,
-                is_matched=False,
-                is_known=False,
-            ),
-            Token(
-                value="Rights",
-                line_num=2,
-                pos=-1,
-                is_text=True,
-                is_matched=False,
-                is_known=False,
-            ),
-            Token(
-                value=" ",
-                line_num=2,
-                pos=-1,
-                is_text=False,
-                is_matched=False,
-                is_known=False,
-            ),
-            Token(
-                value="Reserved",
-                line_num=2,
-                pos=1,
-                is_text=True,
-                is_matched=True,
-                is_known=True,
-            ),
-            Token(
-                value=".  ",
-                line_num=2,
-                pos=-1,
-                is_text=False,
-                is_matched=False,
-                is_known=False,
-            ),
-            Token(
-                value="THIS",
-                line_num=2,
-                pos=2,
-                is_text=True,
-                is_matched=False,
-                is_known=True,
-            ),
-            Token(
-                value="\n",
-                line_num=2,
-                pos=-1,
-                is_text=False,
-                is_matched=False,
-                is_known=False,
-            ),
-        ]
-
-        assert result == expected
-
-    def test_matched_text_is_collected_correctly_end2end(self):
-        rules_data_dir = self.get_test_loc("matched_text/index/rules")
-        query_location = self.get_test_loc("matched_text/query.txt")
-        rules = models.load_rules(rules_data_dir)
-        idx = index.LicenseIndex(rules)
-
-        results = [
-            match.matched_text(_usecache=False)
-            for match in idx.match(location=query_location)
-        ]
-        expected = [
-            "This source code is licensed under both the Apache 2.0 license "
-            "(found in the\n#  LICENSE",
-            "This source code is licensed under [both] [the] [Apache] [2].[0] license "
-            "(found in the\n#  LICENSE file in the root directory of this source tree)",
-            "GPLv2 (",
-        ]
-        assert results == expected
-
-    def check_matched_texts(self, test_loc, expected_texts, whole_lines=True):
-        idx = cache.get_index()
-        test_loc = self.get_test_loc(test_loc)
-        matches = idx.match(location=test_loc)
-        matched_texts = [
-            m.matched_text(whole_lines=whole_lines, highlight=False, _usecache=False)
-            for m in matches
-        ]
-        assert matched_texts == expected_texts
-
-    def test_matched_text_is_collected_correctly_end2end_for_spdx_match_whole_lines(
-        self,
-    ):
-        self.check_matched_texts(
-            test_loc="matched_text/spdx/query.txt",
-            expected_texts=["@REM # SPDX-License-Identifier: BSD-2-Clause-Patent"],
-            whole_lines=True,
-        )
-
-    def test_matched_text_is_collected_correctly_end2end_for_spdx_match_plain(self):
-        self.check_matched_texts(
-            test_loc="matched_text/spdx/query.txt",
-            expected_texts=["SPDX-License-Identifier: BSD-2-Clause-Patent"],
-            whole_lines=False,
-        )
-
-    def test_matched_text_is_not_truncated_with_unicode_diacritic_input_from_query(
-        self,
-    ):
-        idx = cache.get_index()
-        querys_with_diacritic_unicode = "İ license MIT"
-        result = idx.match(query_string=querys_with_diacritic_unicode)
-        assert len(result) == 1
-        match = result[0]
-        expected = "license MIT"
-        matched_text = match.matched_text(
-            _usecache=False,
-        )
-        assert matched_text == expected
-
-    def test_matched_text_is_not_truncated_with_unicode_diacritic_input_from_file(self):
-        idx = cache.get_index()
-        file_with_diacritic_unicode_location = self.get_test_loc(
-            "matched_text/unicode_text/main3.js"
-        )
-        result = idx.match(location=file_with_diacritic_unicode_location)
-        assert len(result) == 1
-        match = result[0]
-        expected = "license MIT"
-        matched_text = match.matched_text(_usecache=False)
-        assert matched_text == expected
-
-    def test_matched_text_is_not_truncated_with_unicode_diacritic_input_from_query_whole_lines(
-        self,
-    ):
-        idx = cache.get_index()
-        querys_with_diacritic_unicode = "İ license MIT"
-        result = idx.match(query_string=querys_with_diacritic_unicode)
-        assert len(result) == 1
-        match = result[0]
-        expected = "[İ] license MIT"
-        matched_text = match.matched_text(_usecache=False, whole_lines=True)
-        assert matched_text == expected
-
-    def test_matched_text_is_not_truncated_with_unicode_diacritic_input_with_diacritic_in_rules(
-        self,
-    ):
-        rule_dir = self.get_test_loc("matched_text/turkish_unicode/rules")
-        idx = index.LicenseIndex(models.load_rules(rule_dir))
-        query_loc = self.get_test_loc("matched_text/turkish_unicode/query")
-        matches = idx.match(location=query_loc)
-        matched_texts = [
-            m.matched_text(whole_lines=False, highlight=False, _usecache=False)
-            for m in matches
-        ]
-
-        expected = [
-            "Licensed under the Apache License, Version 2.0\r\nnext_label=irəli",
-            "İ license MIT",
-            "İ license MIT",
-            "Licensed under the Apache License, Version 2.0\r\nnext_label=irəli",
-            "lİcense mit",
-        ]
-
-        assert matched_texts == expected
-
-    def test_matched_text_is_not_truncated_with_unicode_diacritic_input_and_full_index(
-        self,
-    ):
-        expected = [
-            "Licensed under the Apache License, Version 2.0",
-            "license MIT",
-            "license MIT",
-            "Licensed under the Apache License, Version 2.0",
-        ]
-
-        self.check_matched_texts(
-            test_loc="matched_text/turkish_unicode/query",
-            expected_texts=expected,
-            whole_lines=False,
-        )
-
-    def test_matched_text_does_not_ignores_whole_lines_in_binary_with_small_index(self):
-        rule_dir = self.get_test_loc("matched_text/binary_text/rules")
-        idx = index.LicenseIndex(models.load_rules(rule_dir))
-        query_loc = self.get_test_loc("matched_text/binary_text/gosu")
-        matches = idx.match(location=query_loc)
-        matched_texts = [
-            m.matched_text(whole_lines=True, highlight=False, _usecache=False)
-            for m in matches
-        ]
-
-        expected = [
-            "{{ .Self }} license: GPL-3 (full text at https://github.com/tianon/gosu)"
-        ]
-
-        assert matched_texts == expected
-
-    def test_matched_text_does_not_ignores_whole_lines_in_binary_against_full_index(
-        self,
-    ):
-        expected = [
-            "{{ .Self }} license: GPL-3 (full text at https://github.com/tianon/gosu)"
-        ]
-        self.check_matched_texts(
-            test_loc="matched_text/binary_text/gosu",
-            expected_texts=expected,
-            whole_lines=True,
-        )
-
-    def test_matched_text_is_collected_correctly_in_binary_ffmpeg_windows_whole_lines(
-        self,
-    ):
-        expected_texts = [
-            "--enable-gpl --enable-version3 --enable-dxva2 --enable-libmfx --enable-nvenc "
-            "--enable-avisynth --enable-bzlib --enable-fontconfig --enable-frei0r "
-            "--enable-gnutls --enable-iconv --enable-libass --enable-libbluray "
-            "--enable-libbs2b --enable-libcaca --enable-libfreetype --enable-libgme "
-            "--enable-libgsm --enable-libilbc --enable-libmodplug --enable-libmp3lame "
-            "--enable-libopencore-amrnb --enable-libopencore-amrwb --enable-libopenh264 "
-            "--enable-libopenjpeg --enable-libopus --enable-librtmp --enable-libsnappy "
-            "--enable-libsoxr --enable-libspeex --enable-libtheora --enable-libtwolame "
-            "--enable-libvidstab --enable-libvo-amrwbenc --enable-libvorbis "
-            "--enable-libvpx --enable-libwavpack --enable-libwebp --enable-libx264 "
-            "--enable-libx265 --enable-libxavs --enable-libxvid --enable-libzimg "
-            "--enable-lzma --enable-decklink --enable-zlib",
-            "%sconfiguration: --enable-gpl --enable-version3 --enable-dxva2 "
-            "--enable-libmfx --enable-nvenc --enable-avisynth --enable-bzlib "
-            "--enable-fontconfig --enable-frei0r --enable-gnutls --enable-iconv "
-            "--enable-libass --enable-libbluray --enable-libbs2b --enable-libcaca "
-            "--enable-libfreetype --enable-libgme --enable-libgsm --enable-libilbc "
-            "--enable-libmodplug --enable-libmp3lame --enable-libopencore-amrnb "
-            "--enable-libopencore-amrwb --enable-libopenh264 --enable-libopenjpeg "
-            "--enable-libopus --enable-librtmp --enable-libsnappy --enable-libsoxr "
-            "--enable-libspeex --enable-libtheora --enable-libtwolame --enable-libvidstab "
-            "--enable-libvo-amrwbenc --enable-libvorbis --enable-libvpx "
-            "--enable-libwavpack --enable-libwebp --enable-libx264 --enable-libx265 "
-            "--enable-libxavs --enable-libxvid --enable-libzimg --enable-lzma "
-            "--enable-decklink --enable-zlib",
-            "%s is free software; you can redistribute it and/or modify\n"
-            "it under the terms of the GNU General Public License as published by\n"
-            "the Free Software Foundation; either version 3 of the License, or\n"
-            "(at your option) any later version.\n"
-            "%s is distributed in the hope that it will be useful,\n"
-            "but WITHOUT ANY WARRANTY; without even the implied warranty of\n"
-            "MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the\n"
-            "GNU General Public License for more details.\n"
-            "You should have received a copy of the GNU General Public License\n"
-            "along with %s.  If not, see <http://www.gnu.org/licenses/>.\n"
-            "File formats:\n"
-            "D. = Demuxing supported\n"
-            ".E = Muxing supported\n"
-            "%s%s %-15s %s\n"
-            "Devices:\n"
-            "Codecs:\n"
-            "D..... = Decoding supported\n"
-            ".E.... = Encoding supported\n"
-            "..V... = Video codec\n"
-            "No option name near '%s'\n"
-            "Unable to parse '%s': %s\n"
-            "Setting '%s' to value '%s'\n"
-            "Option '%s' not found\n"
-            "--enable-gpl --enable-version3 --enable-dxva2 --enable-libmfx --enable-nvenc "
-            "--enable-avisynth --enable-bzlib --enable-fontconfig --enable-frei0r "
-            "--enable-gnutls --enable-iconv --enable-libass --enable-libbluray "
-            "--enable-libbs2b --enable-libcaca --enable-libfreetype --enable-libgme "
-            "--enable-libgsm --enable-libilbc --enable-libmodplug --enable-libmp3lame "
-            "--enable-libopencore-amrnb --enable-libopencore-amrwb --enable-libopenh264 "
-            "--enable-libopenjpeg --enable-libopus --enable-librtmp --enable-libsnappy "
-            "--enable-libsoxr --enable-libspeex --enable-libtheora --enable-libtwolame "
-            "--enable-libvidstab --enable-libvo-amrwbenc --enable-libvorbis "
-            "--enable-libvpx --enable-libwavpack --enable-libwebp --enable-libx264 "
-            "--enable-libx265 --enable-libxavs --enable-libxvid --enable-libzimg "
-            "--enable-lzma --enable-decklink --enable-zlib",
-            "--enable-gpl --enable-version3 --enable-dxva2 --enable-libmfx --enable-nvenc "
-            "--enable-avisynth --enable-bzlib --enable-fontconfig --enable-frei0r "
-            "--enable-gnutls --enable-iconv --enable-libass --enable-libbluray "
-            "--enable-libbs2b --enable-libcaca --enable-libfreetype --enable-libgme "
-            "--enable-libgsm --enable-libilbc --enable-libmodplug --enable-libmp3lame "
-            "--enable-libopencore-amrnb --enable-libopencore-amrwb --enable-libopenh264 "
-            "--enable-libopenjpeg --enable-libopus --enable-librtmp --enable-libsnappy "
-            "--enable-libsoxr --enable-libspeex --enable-libtheora --enable-libtwolame "
-            "--enable-libvidstab --enable-libvo-amrwbenc --enable-libvorbis "
-            "--enable-libvpx --enable-libwavpack --enable-libwebp --enable-libx264 "
-            "--enable-libx265 --enable-libxavs --enable-libxvid --enable-libzimg "
-            "--enable-lzma --enable-decklink --enable-zlib",
-            "libavfilter license: GPL version 3 or later",
-            "--enable-gpl --enable-version3 --enable-dxva2 --enable-libmfx --enable-nvenc "
-            "--enable-avisynth --enable-bzlib --enable-fontconfig --enable-frei0r "
-            "--enable-gnutls --enable-iconv --enable-libass --enable-libbluray "
-            "--enable-libbs2b --enable-libcaca --enable-libfreetype --enable-libgme "
-            "--enable-libgsm --enable-libilbc --enable-libmodplug --enable-libmp3lame "
-            "--enable-libopencore-amrnb --enable-libopencore-amrwb --enable-libopenh264 "
-            "--enable-libopenjpeg --enable-libopus --enable-librtmp --enable-libsnappy "
-            "--enable-libsoxr --enable-libspeex --enable-libtheora --enable-libtwolame "
-            "--enable-libvidstab --enable-libvo-amrwbenc --enable-libvorbis "
-            "--enable-libvpx --enable-libwavpack --enable-libwebp --enable-libx264 "
-            "--enable-libx265 --enable-libxavs --enable-libxvid --enable-libzimg "
-            "--enable-lzma --enable-decklink --enable-zlib",
-            "libavformat license: GPL version 3 or later",
-            "--enable-gpl --enable-version3 --enable-dxva2 --enable-libmfx --enable-nvenc "
-            "--enable-avisynth --enable-bzlib --enable-fontconfig --enable-frei0r "
-            "--enable-gnutls --enable-iconv --enable-libass --enable-libbluray "
-            "--enable-libbs2b --enable-libcaca --enable-libfreetype --enable-libgme "
-            "--enable-libgsm --enable-libilbc --enable-libmodplug --enable-libmp3lame "
-            "--enable-libopencore-amrnb --enable-libopencore-amrwb --enable-libopenh264 "
-            "--enable-libopenjpeg --enable-libopus --enable-librtmp --enable-libsnappy "
-            "--enable-libsoxr --enable-libspeex --enable-libtheora --enable-libtwolame "
-            "--enable-libvidstab --enable-libvo-amrwbenc --enable-libvorbis "
-            "--enable-libvpx --enable-libwavpack --enable-libwebp --enable-libx264 "
-            "--enable-libx265 --enable-libxavs --enable-libxvid --enable-libzimg "
-            "--enable-lzma --enable-decklink --enable-zlib",
-            "libavcodec license: GPL version 3 or later",
-            "--enable-gpl --enable-version3 --enable-dxva2 --enable-libmfx --enable-nvenc "
-            "--enable-avisynth --enable-bzlib --enable-fontconfig --enable-frei0r "
-            "--enable-gnutls --enable-iconv --enable-libass --enable-libbluray "
-            "--enable-libbs2b --enable-libcaca --enable-libfreetype --enable-libgme "
-            "--enable-libgsm --enable-libilbc --enable-libmodplug --enable-libmp3lame "
-            "--enable-libopencore-amrnb --enable-libopencore-amrwb --enable-libopenh264 "
-            "--enable-libopenjpeg --enable-libopus --enable-librtmp --enable-libsnappy "
-            "--enable-libsoxr --enable-libspeex --enable-libtheora --enable-libtwolame "
-            "--enable-libvidstab --enable-libvo-amrwbenc --enable-libvorbis "
-            "--enable-libvpx --enable-libwavpack --enable-libwebp --enable-libx264 "
-            "--enable-libx265 --enable-libxavs --enable-libxvid --enable-libzimg "
-            "--enable-lzma --enable-decklink --enable-zlib",
-            "libpostproc license: GPL version 3 or later",
-            "--enable-gpl --enable-version3 --enable-dxva2 --enable-libmfx --enable-nvenc "
-            "--enable-avisynth --enable-bzlib --enable-fontconfig --enable-frei0r "
-            "--enable-gnutls --enable-iconv --enable-libass --enable-libbluray "
-            "--enable-libbs2b --enable-libcaca --enable-libfreetype --enable-libgme "
-            "--enable-libgsm --enable-libilbc --enable-libmodplug --enable-libmp3lame "
-            "--enable-libopencore-amrnb --enable-libopencore-amrwb --enable-libopenh264 "
-            "--enable-libopenjpeg --enable-libopus --enable-librtmp --enable-libsnappy "
-            "--enable-libsoxr --enable-libspeex --enable-libtheora --enable-libtwolame "
-            "--enable-libvidstab --enable-libvo-amrwbenc --enable-libvorbis "
-            "--enable-libvpx --enable-libwavpack --enable-libwebp --enable-libx264 "
-            "--enable-libx265 --enable-libxavs --enable-libxvid --enable-libzimg "
-            "--enable-lzma --enable-decklink --enable-zlib",
-            "libswresample license: GPL version 3 or later",
-            "--enable-gpl --enable-version3 --enable-dxva2 --enable-libmfx --enable-nvenc "
-            "--enable-avisynth --enable-bzlib --enable-fontconfig --enable-frei0r "
-            "--enable-gnutls --enable-iconv --enable-libass --enable-libbluray "
-            "--enable-libbs2b --enable-libcaca --enable-libfreetype --enable-libgme "
-            "--enable-libgsm --enable-libilbc --enable-libmodplug --enable-libmp3lame "
-            "--enable-libopencore-amrnb --enable-libopencore-amrwb --enable-libopenh264 "
-            "--enable-libopenjpeg --enable-libopus --enable-librtmp --enable-libsnappy "
-            "--enable-libsoxr --enable-libspeex --enable-libtheora --enable-libtwolame "
-            "--enable-libvidstab --enable-libvo-amrwbenc --enable-libvorbis "
-            "--enable-libvpx --enable-libwavpack --enable-libwebp --enable-libx264 "
-            "--enable-libx265 --enable-libxavs --enable-libxvid --enable-libzimg "
-            "--enable-lzma --enable-decklink --enable-zlib",
-            "libswscale license: GPL version 3 or later",
-            "--enable-gpl --enable-version3 --enable-dxva2 --enable-libmfx --enable-nvenc "
-            "--enable-avisynth --enable-bzlib --enable-fontconfig --enable-frei0r "
-            "--enable-gnutls --enable-iconv --enable-libass --enable-libbluray "
-            "--enable-libbs2b --enable-libcaca --enable-libfreetype --enable-libgme "
-            "--enable-libgsm --enable-libilbc --enable-libmodplug --enable-libmp3lame "
-            "--enable-libopencore-amrnb --enable-libopencore-amrwb --enable-libopenh264 "
-            "--enable-libopenjpeg --enable-libopus --enable-librtmp --enable-libsnappy "
-            "--enable-libsoxr --enable-libspeex --enable-libtheora --enable-libtwolame "
-            "--enable-libvidstab --enable-libvo-amrwbenc --enable-libvorbis "
-            "--enable-libvpx --enable-libwavpack --enable-libwebp --enable-libx264 "
-            "--enable-libx265 --enable-libxavs --enable-libxvid --enable-libzimg "
-            "--enable-lzma --enable-decklink --enable-zlib",
-            "libavutil license: GPL version 3 or later",
-            "This software is derived from the GNU GPL XviD codec (1.3.0).",
-        ]
-
-        self.check_matched_texts(
-            test_loc="matched_text/ffmpeg/ffmpeg.exe",
-            expected_texts=expected_texts,
-            whole_lines=True,
-        )
-
-    def test_matched_text_is_collected_correctly_in_binary_ffmpeg_windows_not_whole_lines(
-        self,
-    ):
-        expected_texts = [
-            "enable-gpl --enable-version3 --",
-            "enable-gpl --enable-version3 --",
-            "is free software; you can redistribute it and/or modify\n"
-            "it under the terms of the GNU General Public License as published by\n"
-            "the Free Software Foundation; either version 3 of the License, or\n"
-            "(at your option) any later version.\n"
-            "%s is distributed in the hope that it will be useful,\n"
-            "but WITHOUT ANY WARRANTY; without even the implied warranty of\n"
-            "MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the\n"
-            "GNU General Public License for more details.\n"
-            "You should have received a copy of the GNU General Public License\n"
-            "along with %s.  If not, see <http://www.gnu.org/licenses/>.\n"
-            "File formats:\n"
-            "D. = Demuxing supported\n"
-            ".E = Muxing supported\n"
-            "%s%s %-15s %s\n"
-            "Devices:\n"
-            "Codecs:\n"
-            "D..... = Decoding supported\n"
-            ".E.... = Encoding supported\n"
-            "..V... = Video codec\n"
-            "No option name near '%s'\n"
-            "Unable to parse '%s': %s\n"
-            "Setting '%s' to value '%s'\n"
-            "Option '%s' not found\n"
-            "--enable-gpl --",
-            "enable-gpl --enable-version3 --",
-            "license: GPL version 3 or later",
-            "enable-gpl --enable-version3 --",
-            "license: GPL version 3 or later",
-            "enable-gpl --enable-version3 --",
-            "license: GPL version 3 or later",
-            "enable-gpl --enable-version3 --",
-            "license: GPL version 3 or later",
-            "enable-gpl --enable-version3 --",
-            "license: GPL version 3 or later",
-            "enable-gpl --enable-version3 --",
-            "license: GPL version 3 or later",
-            "enable-gpl --enable-version3 --",
-            "license: GPL version 3 or later",
-            "This software is derived from the GNU GPL XviD codec (",
-        ]
-
-        self.check_matched_texts(
-            test_loc="matched_text/ffmpeg/ffmpeg.exe",
-            expected_texts=expected_texts,
-            whole_lines=False,
-        )
-
-    def test_matched_text_is_collected_correctly_in_binary_ffmpeg_elf_whole_lines(self):
-        expected_texts = [
-            "--prefix=/usr --extra-version=0ubuntu0.1 --build-suffix=-ffmpeg "
-            "--toolchain=hardened --libdir=/usr/lib/x86_64-linux-gnu "
-            "--incdir=/usr/include/x86_64-linux-gnu --cc=cc --cxx=g++ --enable-gpl "
-            "--enable-shared --disable-stripping --disable-decoder=libopenjpeg "
-            "--disable-decoder=libschroedinger --enable-avresample --enable-avisynth "
-            "--enable-gnutls --enable-ladspa --enable-libass --enable-libbluray "
-            "--enable-libbs2b --enable-libcaca --enable-libcdio --enable-libflite "
-            "--enable-libfontconfig --enable-libfreetype --enable-libfribidi "
-            "--enable-libgme --enable-libgsm --enable-libmodplug --enable-libmp3lame "
-            "--enable-libopenjpeg --enable-libopus --enable-libpulse --enable-librtmp "
-            "--enable-libschroedinger --enable-libshine --enable-libsnappy "
-            "--enable-libsoxr --enable-libspeex --enable-libssh --enable-libtheora "
-            "--enable-libtwolame --enable-libvorbis --enable-libvpx --enable-libwavpack "
-            "--enable-libwebp --enable-libx265 --enable-libxvid --enable-libzvbi "
-            "--enable-openal --enable-opengl --enable-x11grab --enable-libdc1394 "
-            "--enable-libiec61883 --enable-libzmq --enable-frei0r --enable-libx264 "
-            "--enable-libopencv",
-            "%sconfiguration: --prefix=/usr --extra-version=0ubuntu0.1 "
-            "--build-suffix=-ffmpeg --toolchain=hardened "
-            "--libdir=/usr/lib/x86_64-linux-gnu --incdir=/usr/include/x86_64-linux-gnu "
-            "--cc=cc --cxx=g++ --enable-gpl --enable-shared --disable-stripping "
-            "--disable-decoder=libopenjpeg --disable-decoder=libschroedinger "
-            "--enable-avresample --enable-avisynth --enable-gnutls --enable-ladspa "
-            "--enable-libass --enable-libbluray --enable-libbs2b --enable-libcaca "
-            "--enable-libcdio --enable-libflite --enable-libfontconfig "
-            "--enable-libfreetype --enable-libfribidi --enable-libgme --enable-libgsm "
-            "--enable-libmodplug --enable-libmp3lame --enable-libopenjpeg "
-            "--enable-libopus --enable-libpulse --enable-librtmp --enable-libschroedinger "
-            "--enable-libshine --enable-libsnappy --enable-libsoxr --enable-libspeex "
-            "--enable-libssh --enable-libtheora --enable-libtwolame --enable-libvorbis "
-            "--enable-libvpx --enable-libwavpack --enable-libwebp --enable-libx265 "
-            "--enable-libxvid --enable-libzvbi --enable-openal --enable-opengl "
-            "--enable-x11grab --enable-libdc1394 --enable-libiec61883 --enable-libzmq "
-            "--enable-frei0r --enable-libx264 --enable-libopencv",
-            "%s is free software; you can redistribute it and/or modify\n"
-            "it under the terms of the GNU General Public License as published by\n"
-            "the Free Software Foundation; either version 2 of the License, or\n"
-            "(at your option) any later version.\n"
-            "%s is distributed in the hope that it will be useful,\n"
-            "but WITHOUT ANY WARRANTY; without even the implied warranty of\n"
-            "MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the\n"
-            "GNU General Public License for more details.\n"
-            "You should have received a copy of the GNU General Public License\n"
-            "along with %s; if not, write to the Free Software\n"
-            "Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA",
-        ]
-
-        self.check_matched_texts(
-            test_loc="matched_text/ffmpeg/ffmpeg",
-            expected_texts=expected_texts,
-            whole_lines=True,
-        )
-
-    def test_matched_text_is_collected_correctly_in_binary_ffmpeg_static_whole_lines(
-        self,
-    ):
-        expected_texts = ["libswresample license: LGPL version 2.1 or later"]
-        self.check_matched_texts(
-            test_loc="matched_text/ffmpeg/libavsample.lib",
-            expected_texts=expected_texts,
-            whole_lines=True,
-        )
diff --git a/scantext/tests/test_views.py b/scantext/tests/test_views.py
index 8b4fe938c..a716fa89f 100644
--- a/scantext/tests/test_views.py
+++ b/scantext/tests/test_views.py
@@ -19,6 +19,10 @@
 
 TEST_DATA_DIR = os.path.join(os.path.dirname(__file__), "data")
 
+SCANCODE_BASE_URL = (
+    "https://github.com/nexB/scancode-toolkit/tree/develop/src/licensedcode/data"
+)
+
 
 class TestScantextViews(FileBasedTesting):
     test_data_dir = TEST_DATA_DIR
@@ -67,6 +71,6 @@ def test_get_rule_text_url__with_default_base_url(self):
         rule1 = models.Rule(license_expression="apache-2.0 or mit", stored_text="1")
         rule1.identifier = "apache-2.0_or_mit_48.RULE"
         result = get_rule_text_url(rule=rule1)
-        expected = "https://github.com/nexB/scancode-toolkit/tree/develop/src/licensedcode/data/rules/apache-2.0_or_mit_48.RULE"
+        expected = SCANCODE_BASE_URL + "/rules/apache-2.0_or_mit_48.RULE"
 
         assert result == expected
diff --git a/scantext/views.py b/scantext/views.py
index 265ac9167..7b8ef1cec 100644
--- a/scantext/views.py
+++ b/scantext/views.py
@@ -208,8 +208,8 @@ def get_licenses(
 
 def get_percentage_of_license_text(query, matches):
     """
-    Return percentage of license text matched in ``query`` Query by a list of ``matches``
-    percentage is a float between 0 and 100.
+    Return percentage of license text matched in ``query`` Query by
+    a list of ``matches`` percentage is a float between 0 and 100.
     """
 
     # TODO: percentage of license text should be done by scancode-toolkit.
@@ -274,7 +274,9 @@ def build_colors(matches_by_id):
     .matched3 {background-color: rgba(220, 90, 30, 0.3);}
     """
     return [
-        f".matched{mid} {{background-color: rgba({(244 * (mid+1)) % 255}, {(234 * (mid+1)) % 255}, {(130 * (mid+1)) % 255}, 0.3);}}"
+        f""".matched{mid} {{background-color: rgba(
+        {(244 * (mid+1)) % 255}, {(234 * (mid+1)) % 255}, {(130 * (mid+1)) % 255},
+        0.3);}}"""
         for mid in matches_by_id
     ]
 

From 57a1d62075d4d2c43fdfbdc5c22a8f58f55b3fb5 Mon Sep 17 00:00:00 2001
From: Akhil Raj <lf32.dev@gmail.com>
Date: Sun, 18 Sep 2022 16:58:56 +0530
Subject: [PATCH 48/59] Indent html, handle input, fix modules & tests#450

* Indent html code to 2 spaces
* Create temp directories for scanning
* Move all non-views to `match_text.py`
* Correct existing tests to match the rest of the app
* Remove debug code

Signed-off-by: Akhil Raj <lf32.dev@gmail.com>
---
 scantext/match_text.py                        | 326 ++++++++++++++----
 .../includes/license_detail_modal.html        |  98 +++---
 .../scantext/includes/license_report.html     |   4 +-
 .../includes/license_summary_detail.html      | 123 +++----
 .../includes/license_summary_header.html      |  42 +--
 .../templates/scantext/license_scan_form.html |  64 ++--
 .../templates/scantext/license_summary.html   |  59 ++--
 scantext/tests/test_views.py                  |  47 +--
 scantext/views.py                             | 290 +---------------
 9 files changed, 490 insertions(+), 563 deletions(-)

diff --git a/scantext/match_text.py b/scantext/match_text.py
index 9e5db2dff..615fe3fb6 100644
--- a/scantext/match_text.py
+++ b/scantext/match_text.py
@@ -1,62 +1,38 @@
+# SPDX-License-Identifier: Apache-2.0
 #
-# Copyright (c) nexB Inc. and others. All rights reserved.
+# http://nexb.com and https://github.com/nexB/scancode.io
+# The ScanCode.io software is licensed under the Apache License version 2.0.
+# Data generated with ScanCode.io is provided as-is without warranties.
 # ScanCode is a trademark of nexB Inc.
-# SPDX-License-Identifier: Apache-2.0
-# See http://www.apache.org/licenses/LICENSE-2.0 for the license text.
-# See https://github.com/nexB/scancode-toolkit for support or download.
-# See https://aboutcode.org for more information about nexB OSS projects.
 #
+# You may not use this software except in compliance with the License.
+# You may obtain a copy of the License at: http://apache.org/licenses/LICENSE-2.0
+# Unless required by applicable law or agreed to in writing, software distributed
+# under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+# CONDITIONS OF ANY KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations under the License.
+#
+# Data Generated with ScanCode.io is provided on an "AS IS" BASIS, WITHOUT WARRANTIES
+# OR CONDITIONS OF ANY KIND, either express or implied. No content created from
+# ScanCode.io should be considered or used as legal advice. Consult an Attorney
+# for any legal advice.
+#
+# ScanCode.io is a free software code scanning tool from nexB Inc. and others.
+# Visit https://github.com/nexB/scancode.io for support and download.
 
 import attr
+from licensedcode import models
 from licensedcode import query
 from licensedcode.spans import Span
 from licensedcode.stopwords import STOPWORDS
 from licensedcode.tokenize import index_tokenizer
 from licensedcode.tokenize import matched_query_text_tokenizer
 
-TRACE = False
-TRACE_MATCHED_TEXT = False
-TRACE_MATCHED_TEXT_DETAILS = False
-
-
-def logger_debug(*args):
-    pass
-
-
-if TRACE or TRACE_MATCHED_TEXT or TRACE_MATCHED_TEXT_DETAILS:
-
-    use_print = True
-    if use_print:
-        prn = print
-    else:
-        import logging
-        import sys
-
-        logger = logging.getLogger(__name__)
-        # logging.basicConfig(level=logging.DEBUG, stream=sys.stdout)
-        logging.basicConfig(stream=sys.stdout)
-        logger.setLevel(logging.DEBUG)
-        prn = logger.debug
-
-    def logger_debug(*args):
-        return prn(" ".join(isinstance(a, str) and a or repr(a) for a in args))
-
-    def _debug_print_matched_query_text(match, extras=5):
-        """
-        Print a matched query text including `extras` tokens before and after
-        the match. Used for debugging license matches.
-        """
-        # Create a fake new match with extra tokens before and after
-        new_match = match.combine(match)
-        new_qstart = max([0, match.qstart - extras])
-        new_qend = min([match.qend + extras, len(match.query.tokens)])
-        new_qspan = Span(new_qstart, new_qend)
-        new_match.qspan = new_qspan
-
-        logger_debug(new_match)
-        logger_debug(" MATCHED QUERY TEXT with extras")
-        qt = new_match.matched_text(whole_lines=False)
-        logger_debug(qt)
+SCANCODE_BASE_URL = (
+    "https://github.com/nexB/scancode-toolkit/tree/develop/src/licensedcode/data"
+)
+SPDX_LICENSE_URL = "https://spdx.org/licenses/{}"
+SCANCODE_LICENSEDB_URL = "https://scancode-licensedb.aboutcode.org/{}"
 
 
 @attr.s(slots=True)
@@ -103,13 +79,168 @@ class Token:
     match_ids = attr.ib(attr.Factory(list))
 
 
-def tokenize_matched_text(
-    location,
-    query_string,
-    dictionary,
-    start_line=1,
-    trace=TRACE_MATCHED_TEXT_DETAILS,
-):
+def get_match_details(mid, match, license_url_template, spdx_license_url):
+    """
+    Return a mapping of license data built from a LicenseMatch ``match``.
+    """
+    from licensedcode import cache
+
+    licenses = cache.get_licenses_db()
+
+    # TODO: decide whether the text should be highlighted or not.
+    matched_text = match.matched_text(whole_lines=False, highlight=False)
+
+    SCANCODE_LICENSE_TEXT_URL = SCANCODE_BASE_URL + "/{}.LICENSE"
+    SCANCODE_LICENSE_DATA_URL = SCANCODE_BASE_URL + "/{}.yml"
+
+    result = {}
+
+    result["mid"] = mid
+    # Detection Level Information
+    result["score"] = int(match.score())
+    result["start_line"] = match.start_line
+    result["end_line"] = match.end_line
+    result["matched_length"] = match.len()
+    result["match_coverage"] = match.coverage()
+    result["matcher"] = match.matcher
+
+    # LicenseDB Level Information (Rule that was matched)
+    result["license_expression"] = match.rule.license_expression
+    result["rule_text_url"] = get_rule_text_url(match.rule)
+    result["rule_identifier"] = match.rule.identifier
+    result["referenced_filenames"] = match.rule.referenced_filenames
+    result["is_license_text"] = match.rule.is_license_text
+    result["is_license_notice"] = match.rule.is_license_notice
+    result["is_license_reference"] = match.rule.is_license_reference
+    result["is_license_tag"] = match.rule.is_license_tag
+    result["is_license_intro"] = match.rule.is_license_intro
+    result["rule_length"] = match.rule.length
+    result["rule_relevance"] = match.rule.relevance
+    result["matched_text"] = matched_text
+
+    # License Level Information (Individual licenses that this rule refers to)
+    result["licenses"] = detected_licenses = []
+    for license_key in match.rule.license_keys():
+        detected_license = {}
+        detected_licenses.append(detected_license)
+
+        lic = licenses.get(license_key)
+
+        detected_license["key"] = lic.key
+        detected_license["name"] = lic.name
+        detected_license["short_name"] = lic.short_name
+        detected_license["category"] = lic.category
+        detected_license["is_exception"] = lic.is_exception
+        detected_license["is_unknown"] = lic.is_unknown
+        detected_license["owner"] = lic.owner
+        detected_license["homepage_url"] = lic.homepage_url
+        detected_license["text_url"] = lic.text_urls[0] if lic.text_urls else ""
+        detected_license["reference_url"] = license_url_template.format(lic.key)
+        detected_license["scancode_text_url"] = SCANCODE_LICENSE_TEXT_URL.format(
+            lic.key
+        )
+        detected_license["scancode_data_url"] = SCANCODE_LICENSE_DATA_URL.format(
+            lic.key
+        )
+
+        spdx_key = lic.spdx_license_key
+        detected_license["spdx_license_key"] = spdx_key
+
+        if spdx_key:
+            is_license_ref = spdx_key.lower().startswith("licenseref-")
+            if is_license_ref:
+                spdx_url = SCANCODE_LICENSE_TEXT_URL.format(lic.key)
+            else:
+                # TODO: Is this replacing spdx_key???
+                spdx_key = lic.spdx_license_key.rstrip("+")
+                spdx_url = spdx_license_url.format(spdx_key)
+        else:
+            spdx_url = ""
+        detected_license["spdx_url"] = spdx_url
+
+    return result
+
+
+def get_licenses(location, license_url_template=SCANCODE_LICENSEDB_URL, **kwargs):
+    """
+    Return a mapping of license match data from detecting license
+    in the file at ``location`` suitable for use in template.
+
+    The mapping can be empty if there are no matches.
+    """
+    from licensedcode.cache import get_index
+
+    idx = get_index()
+
+    # gets matches from a license file
+    matches = idx.match(
+        location=location,
+        unknown_licenses=True,
+        **kwargs,
+    )
+
+    if not matches:
+        return {}
+
+    query = matches[0].query
+
+    # Assign a numeric id to every match.
+    matches_by_id = dict(enumerate(matches))
+
+    del matches
+
+    license_matches = []
+
+    for mid, match in matches_by_id.items():
+        license_matches.append(
+            get_match_details(
+                mid=mid,
+                match=match,
+                license_url_template=license_url_template,
+                spdx_license_url=SPDX_LICENSE_URL,
+            )
+        )
+
+    license_tokens = get_license_tokens(
+        query=query,
+        matches_by_id=matches_by_id,
+        stopwords=STOPWORDS,
+    )
+
+    match_colors = get_build_colors(matches_by_id=matches_by_id)
+
+    return {
+        "license_matches": license_matches,
+        "license_tokens": license_tokens,
+        "match_colors": match_colors,
+        "license_keys_count": get_license_keys_count(matches=matches_by_id.values()),
+        "percentage_of_license_text": get_percentage_of_license_text(
+            query=query, matches=matches_by_id.values()
+        ),
+    }
+
+
+def get_license_tokens(query, matches_by_id, stopwords=STOPWORDS):
+    """
+    Return a list of tokens from the list of ``matches`` in ``query``.
+    """
+    # Token(value="", pos=3, is_text=True, is_matched=True, match_ids=[mid, mid, mid])
+    tokens = list(
+        tokenize_matched_text(
+            location=query.location,
+            query_string=query.query_string,
+            dictionary=query.idx.dictionary,
+            start_line=query.start_line,
+        )
+    )
+
+    for mid, match in matches_by_id.items():
+        tag_matched_tokens(tokens=tokens, match_qspan=match.qspan, mid=mid)
+
+    return tokens
+
+
+def tokenize_matched_text(location, query_string, dictionary, start_line=1):
     """
     Yield Token objects with pos and line number collected from the file at
     `location` or the `query_string` string. `dictionary` is the index mapping
@@ -123,15 +254,7 @@ def tokenize_matched_text(
         start_line=start_line,
     )
     for line_num, line in qls:
-        if trace:
-            logger_debug(
-                "  tokenize_matched_text:", "line_num:", line_num, "line:", line
-            )
-
         for is_text, token_str in matched_query_text_tokenizer(line):
-            if trace:
-                logger_debug("     is_text:", is_text, "token_str:", repr(token_str))
-
             # Determine if a token is is_known in the license index or not. This
             # is essential as we need to realign the query-time tokenization
             # with the full text to report proper matches.
@@ -196,3 +319,80 @@ def tokenize_matched_text(
                     is_known=False,
                     pos=-1,
                 )
+
+
+def tag_matched_tokens(tokens, match_qspan, mid):
+    """
+    Tag an iterable of ``tokens`` tagging each token with ``mid`` match id
+    if matched meaning the token is in the ``match_qspan``.
+    """
+    previous_is_matched = False
+    for tok in tokens:
+        if previous_is_matched and not tok.is_text:
+            tok.match_ids.append(mid)
+            tok = attr.evolve(tok, is_matched=True)
+            previous_is_matched = False
+        elif tok.pos != -1 and tok.is_known and tok.pos in match_qspan:
+            tok.match_ids.append(mid)
+            tok = attr.evolve(tok, is_matched=True)
+            previous_is_matched = True
+
+
+def get_build_colors(matches_by_id):
+    """
+    Return a mapping of mid to css color code.
+
+    .matched1 {background-color: rgba(30, 220, 90, 0.3);}
+    .matched2 {background-color: rgba(30, 90, 220, 0.3);}
+    .matched3 {background-color: rgba(220, 90, 30, 0.3);}
+    """
+    return [
+        f""".matched{mid} {{background-color: rgba(
+        {(244 * (mid+1)) % 255}, {(234 * (mid+1)) % 255}, {(130 * (mid+1)) % 255},
+        0.3);}}"""
+        for mid in matches_by_id
+    ]
+
+
+def get_percentage_of_license_text(query, matches):
+    """
+    Return percentage of license text matched in ``query`` Query by
+    a list of ``matches`` percentage is a float between 0 and 100.
+    """
+
+    # TODO: percentage of license text should be done by scancode-toolkit.
+    if not matches:
+        return 0
+
+    qspans = (match.qspan for match in matches)
+
+    matched_tokens_length = len(Span().union(*qspans))
+    query_tokens_length = query.tokens_length(with_unknown=True)
+    return round((matched_tokens_length / query_tokens_length) * 100, 2)
+
+
+def get_rule_text_url(rule, base_url=SCANCODE_BASE_URL):
+    """
+    Return a URL to the text file of a ``rule`` Rule.
+    Return None if there is no URL for the ``rule``.
+    """
+
+    if isinstance(rule, (models.SpdxRule, models.UnknownRule)):
+        return
+
+    if rule.is_from_license:
+        return f"{base_url}/licenses/{rule.identifier}"
+
+    else:
+        return f"{base_url}/rules/{rule.identifier}"
+
+
+def get_license_keys_count(matches):
+    """
+    Return the number of unique license keys found in a list of license matches.
+    """
+    keys = set()
+    for match in matches:
+        keys.update(match.rule.license_keys())
+
+    return len(keys)
diff --git a/scantext/templates/scantext/includes/license_detail_modal.html b/scantext/templates/scantext/includes/license_detail_modal.html
index 5a269fcd4..b85171ce5 100644
--- a/scantext/templates/scantext/includes/license_detail_modal.html
+++ b/scantext/templates/scantext/includes/license_detail_modal.html
@@ -7,55 +7,55 @@
     </header>
     <section class="modal-card-body is-4by4">
       <table class="table is-striped is-hoverable is-fullwidth is-size-6">
-            <tbody>
-                <tr>
-                    <td><strong>Score</strong></td>
-                    <td>{{ license.score }}</td>
-                </tr>
-                <tr>
-                    <td><strong>Matched Line(s)</strong></td>
-                    <td>{% if license.start_line == license.end_line %} {{ license.start_line }} {% else %} {{ license.start_line }} - {{ license.end_line }} {% endif %}</td>
-                </tr>
-                <tr>
-                    <td><strong>Rule Identifier</strong></td>
-                    <td>
-                    {% if license.rule_text_url %}
-                        <a href="{{ license.rule_text_url }}" target="_blank">{{ license.rule_identifier }}</a>
-                    {% else %}
-                        {{ license.rule_identifier }}
-                    {% endif %}
-                    </td>
-                </tr>
-                <tr>
-                    <td><strong>Matcher</strong></td>
-                    <td>{{ license.matcher }}</td>
-                </tr>
-                <tr>
-                    <td><strong>Match Coverage</strong></td>
-                    <td>{{ license.match_coverage }}</td>
-                </tr>
-                <tr>
-                    <td><strong>Matched Length</strong></td>
-                    <td>{{ license.matched_length }}</td>
-                </tr>
-                <tr>
-                    <td><strong>Key(s)</strong></td>
-                    <td>
-                    {% for key in license.licenses %}
-                        <a href="{{ key.reference_url }}" target="_blank"><span class="mr-2">{{ key.key }}</span></a>
-                    {% endfor %}
-                    </td>
-                </tr>
-                <tr>
-                    <td><strong>Rule Relevance</strong></td>
-                    <td>{{ license.rule_relevance }}</td>
-                </tr>
-                <tr>
-                    <td><strong>Rule Length</strong></td>
-                    <td>{{ license.rule_length }}</td>
-                </tr>
-            </tbody>
-        </table>
+        <tbody>
+          <tr>
+            <td><strong>Score</strong></td>
+            <td>{{ license.score }}</td>
+          </tr>
+          <tr>
+            <td><strong>Matched Line(s)</strong></td>
+            <td>{% if license.start_line == license.end_line %} {{ license.start_line }} {% else %} {{ license.start_line }} - {{ license.end_line }} {% endif %}</td>
+          </tr>
+          <tr>
+            <td><strong>Rule Identifier</strong></td>
+            <td>
+            {% if license.rule_text_url %}
+                <a href="{{ license.rule_text_url }}" target="_blank">{{ license.rule_identifier }}</a>
+            {% else %}
+                {{ license.rule_identifier }}
+            {% endif %}
+            </td>
+          </tr>
+          <tr>
+            <td><strong>Matcher</strong></td>
+            <td>{{ license.matcher }}</td>
+          </tr>
+          <tr>
+            <td><strong>Match Coverage</strong></td>
+            <td>{{ license.match_coverage }}</td>
+          </tr>
+          <tr>
+            <td><strong>Matched Length</strong></td>
+            <td>{{ license.matched_length }}</td>
+          </tr>
+          <tr>
+            <td><strong>Key(s)</strong></td>
+            <td>
+            {% for key in license.licenses %}
+                <a href="{{ key.reference_url }}" target="_blank"><span class="mr-2">{{ key.key }}</span></a>
+            {% endfor %}
+            </td>
+          </tr>
+          <tr>
+            <td><strong>Rule Relevance</strong></td>
+            <td>{{ license.rule_relevance }}</td>
+          </tr>
+          <tr>
+            <td><strong>Rule Length</strong></td>
+            <td>{{ license.rule_length }}</td>
+          </tr>
+        </tbody>
+      </table>
     </section>
     <footer class="modal-card-foot">
       <button class="button is-outlined  has-text-weight-semibold">
diff --git a/scantext/templates/scantext/includes/license_report.html b/scantext/templates/scantext/includes/license_report.html
index 9c11410c2..aa3c064d5 100644
--- a/scantext/templates/scantext/includes/license_report.html
+++ b/scantext/templates/scantext/includes/license_report.html
@@ -1,2 +1,2 @@
-<a class="has-text-danger" href="https://github.com/nexB/scancode.io/issues/new?labels=bug&title=License+detection+error+as+`{{ license.license_expression|pprint }}`
-&body=Detection+level+details%0A```python%0A{%0A%20%20%20%20score+:+{{ license.score }}+%0A%20%20%20%20start_line+:+{{ license.start_line }}+%0A%20%20%20%20end_line+:+{{ license.end_line }}+%0A%20%20%20%20matched_length+:+{{ license.matched_length }}+%0A%20%20%20%20match_coverage+:+{{ license.match_coverage }}+%0A%20%20%20%20rule_identifier+:+{{ license.rule_identifier }}%0A}%0A```+%0A%0AMatched+Text%0A```%0A{{ license.matched_text }}%0A```+%0A%0AInput+Text%0A```%0A{{ license.matched_text }}%0A```" target="_blank">Report on Github</a>
\ No newline at end of file
+<a class="has-text-danger" href="https://github.com/nexB/scancode-toolkit/issues/new?labels=bug&title=License+detection+error+as+`{{ license.license_expression|pprint }}`
+&body=Detection+level+details%0A```python%0A{%0A%20%20%20%20score+:+{{ license.score }}+%0A%20%20%20%20start_line+:+{{ license.start_line }}+%0A%20%20%20%20end_line+:+{{ license.end_line }}+%0A%20%20%20%20matched_length+:+{{ license.matched_length }}+%0A%20%20%20%20match_coverage+:+{{ license.match_coverage }}+%0A%20%20%20%20rule_identifier+:+{{ license.rule_identifier }}%0A}%0A```" target="_blank">Report on Github</a>
\ No newline at end of file
diff --git a/scantext/templates/scantext/includes/license_summary_detail.html b/scantext/templates/scantext/includes/license_summary_detail.html
index 7587b1389..9ee936055 100644
--- a/scantext/templates/scantext/includes/license_summary_detail.html
+++ b/scantext/templates/scantext/includes/license_summary_detail.html
@@ -1,67 +1,68 @@
 <div class="columns mb-5 mx-1">
-    <div class="column is-one-third" style="max-height: 50vh;">
-        <p class="title is-5">Detected Licenses</p>
-        <div class="panel is-light is-shadowless">
-        {% for license in detected_licenses.license_matches %}
-            <div class="panel-block licenses-card is-flex is-justify-content-space-between p-0">
-                <div class="card-header-title">{{ license.license_expression }}</div>
-                <div class="is-flex is-justify-content-row is-align-items-center">
-                    <p class="tag is-6 mx-1 is-light {% if license.score == 100 %} is-success {% else %} is-warning {% endif %}">{{ license.score }}</p>
-                    <div class="dropdown is-hoverable">
-                        <p class="card-header-icon">
-                            <span class="icon">
-                                <i class="fa fa-angle-down" aria-hidden="true"></i>
-                            </span>
-                        </p>
-                        <div class="dropdown-menu">
-                            <div class="dropdown-content py-0">
-                                <table class="panel-block table is-striped is-size-6 is-hoverable mb-0" style="white-space: nowrap;">
-                                    <tr>
-                                        <td><strong>License Expression</strong></td>
-                                        <td>{{ license.license_expression }}</td>
-                                    </tr>
-                                    <tr>
-                                        <td><strong>Score</strong></td>
-                                        <td>{{ license.score }}</td>
-                                    </tr>
-                                    <tr>
-                                        <td><strong>Matched Line(s)</strong></td>
-                                        <td>{% if license.start_line == license.end_line %}{{ license.start_line }}{% else %}{{ license.start_line }}-{{ license.end_line }} {% endif %}</td>
-                                    </tr>
-                                    <tr>
-                                        <td><strong>Matched Length</strong></td>
-                                        <td>{{ license.matched_length }}</td>
-                                    </tr>
-                                    <tr>
-                                        <td><strong>Matched Coverage</strong></td>
-                                        <td>{{ license.match_coverage }}</td>
-                                    </tr>
-                                    <tr>
-                                        <td><strong>Matcher</strong></td>
-                                        <td>{{ license.matcher }}</td>
-                                    </tr>
-                                    <tr>
-                                        <td><strong>Rule Identifer</strong></td>
-                                        <td>{{ license.rule_identifier }}</td>
-                                    </tr>
-                                </table>
-                                <div class="panel-heading py-2 is-size-7 is-dark is-flex is-justify-content-space-between has-background-white">
-                                    <span class="license-details-btn has-text-link" style="cursor: pointer;">View more details</span>
-                                    {% include 'scantext/includes/license_detail_modal.html' with license=license %}
-                                    <!-- report license hyperlink -->
-                                    {% include 'scantext/includes/license_report.html' with license=license %}
-                                </div>
-                            </div>
-                        </div>
-                    </div>
+  <div class="column is-one-third">
+    <p class="title is-5">Detected Licenses</p>
+    <div class="panel is-light is-shadowless">
+    {% for license in detected_licenses.license_matches %}
+      <div class="panel-block is-flex is-justify-content-space-between p-0 matched{{ license.mid }}">
+        <div class="card-header-title"><span class="has-text-link mr-2">{{ license.mid|add:1 }}.</span> {{ license.license_expression }}</div>
+        <div class="is-flex is-justify-content-row is-align-items-center">
+          <p class="tag is-6 mx-1 is-light {% if license.score == 100 %} is-success {% else %} is-warning {% endif %}">{{ license.score }}%</p>
+          <div class="dropdown is-hoverable">
+            <a href="#{{ license.mid }}">
+              <p class="card-header-icon">
+                <span class="icon">
+                  <i class="fa fa-angle-down" aria-hidden="true"></i>
+                </span>
+              </p>
+            </a>
+            <div class="dropdown-menu">
+              <div class="dropdown-content py-0">
+                <table class="panel-block table is-striped is-size-6 is-hoverable mb-0" style="white-space: nowrap;">
+                  <tr>
+                    <td><strong>License Expression</strong></td>
+                    <td>{{ license.license_expression }}</td>
+                  </tr>
+                  <tr>
+                    <td><strong>Score</strong></td>
+                    <td>{{ license.score }}%</td>
+                  </tr>
+                  <tr>
+                    <td><strong>Matched Line(s)</strong></td>
+                    <td>{% if license.start_line == license.end_line %}{{ license.start_line }}{% else %}{{ license.start_line }}-{{ license.end_line }} {% endif %}</td>
+                  </tr>
+                  <tr>
+                    <td><strong>Matched Length</strong></td>
+                    <td>{{ license.matched_length }}</td>
+                  </tr>
+                  <tr>
+                    <td><strong>Matched Coverage</strong></td>
+                    <td>{{ license.match_coverage }}</td>
+                  </tr>
+                  <tr>
+                    <td><strong>Matcher</strong></td>
+                    <td>{{ license.matcher }}</td>
+                  </tr>
+                  <tr>
+                    <td><strong>Rule Identifer</strong></td>
+                    <td>{{ license.rule_identifier }}</td>
+                  </tr>
+                </table>
+                <div class="panel-heading py-2 is-size-7 is-dark is-flex is-justify-content-space-between has-background-white">
+                  <span class="license-details-btn has-text-link" style="cursor: pointer;">View more details</span>
+                  {% include 'scantext/includes/license_detail_modal.html' with license=license %}
+                  <!-- report license hyperlink -->
+                  {% include 'scantext/includes/license_report.html' with license=license %}
                 </div>
+              </div>
             </div>
-            {% endfor %}
+          </div>
         </div>
+      </div>
+    {% endfor %}
     </div>
-    <div class="column is-two-third">
-        <p class="title is-5">Input Text</p>
-        <div class="license-match">{% for token in detected_licenses.license_tokens %}<span class="matched{{ token.match_ids.0 }}" 
-            title="{{ token.match_ids.0 }}">{{ token.value }}</span>{% endfor %}</div>
-    </div>
+  </div>
+  <div class="column is-two-third">
+    <p class="title is-5">Input Text</p>
+    <div class="license-match">{% for token in detected_licenses.license_tokens %}<span class="license-token matched{{ token.match_ids.0 }}" id="{{ token.match_ids.0 }}">{{ token.value }}</span>{% endfor %}</div>
+  </div>
 </div>
\ No newline at end of file
diff --git a/scantext/templates/scantext/includes/license_summary_header.html b/scantext/templates/scantext/includes/license_summary_header.html
index b9f83fa87..d580d2cee 100644
--- a/scantext/templates/scantext/includes/license_summary_header.html
+++ b/scantext/templates/scantext/includes/license_summary_header.html
@@ -1,27 +1,27 @@
 {% load humanize %}
 <nav class="level is-mobile">
-    <div class="level-item has-text-centered">
-        <div>
-            <p class="heading">Percentage Of License Text</p>
-            <p class="title">
-              <span>{{ detected_licenses.percentage_of_license_text }}</span>
-            </p>
-        </div>
+  <div class="level-item has-text-centered">
+    <div>
+      <p class="heading">Percentage Of License Text</p>
+      <p class="title">
+        <span>{{ detected_licenses.percentage_of_license_text }}</span>
+      </p>
     </div>
-    <div class="level-item has-text-centered">
-        <div>
-            <p class="heading">License Expressions</p>
-            <p class="title">
-              <span>{{ detected_licenses.license_matches|length }}</span>
-            </p>
-        </div>
+  </div>
+  <div class="level-item has-text-centered">
+    <div>
+      <p class="heading">License Expressions</p>
+      <p class="title">
+        <span>{{ detected_licenses.license_matches|length }}</span>
+      </p>
     </div>
-    <div class="level-item has-text-centered">
-        <div>
-            <p class="heading">Licenses</p>
-            <p class="title">
-              <span>{{ detected_licenses.license_keys_count }}</span>
-            </p>
-        </div>
+  </div>
+  <div class="level-item has-text-centered">
+    <div>
+      <p class="heading">Licenses</p>
+      <p class="title">
+        <span>{{ detected_licenses.license_keys_count }}</span>
+      </p>
     </div>
+  </div>
 </nav>
\ No newline at end of file
diff --git a/scantext/templates/scantext/license_scan_form.html b/scantext/templates/scantext/license_scan_form.html
index 0c55122d8..6b16acf13 100644
--- a/scantext/templates/scantext/license_scan_form.html
+++ b/scantext/templates/scantext/license_scan_form.html
@@ -3,53 +3,51 @@
 
 {% block content %}
 <div class="container is-max-desktop">
-    {% include 'scanpipe/includes/navbar_header.html' %}
-    <div class="mx-5 mb-2">{% include 'scanpipe/includes/messages.html' %}</div>
+  {% include 'scanpipe/includes/navbar_header.html' %}
+  <div class="mx-5 mb-2">{% include 'scanpipe/includes/messages.html' %}</div>
 
-    <section class="mx-5 mb-4">
-        <div class="mb-4">
-            <h1 class="title is-4">Scan License</h1>
-        </div>
+  <section class="mx-5 mb-4">
+    <div class="mb-4">
+        <h1 class="title is-4">Scan License</h1>
+    </div>
 
-        <form action="{% url 'license_scan' %}" method="post" enctype="multipart/form-data">
-            {% csrf_token %}
-            {{ form.input_text }}
-            <div class="columns mt-1">
-                <div class="file has-name is-fullwidth column is-half">
-                  <label class="file-label">
-                    {{ form.input_file }}
-                    <span class="file-cta">
-                      <span class="file-icon">
-                        <i class="fas fa-upload"></i>
-                      </span>
-                      <span class="file-label-text">Choose a text file to scan…</span>
-                    </span>
-                    <span class="file-name is-hidden"></span>
-                  </label>
-                </div>
-                <div class="column is-half">
-                  <input type="submit" class="button is-fullwidth is-link" value="Scan License">
-                </div>
-            </div>
-        </form>
-    </section>
+    <form action="{% url 'license_scan' %}" method="post" enctype="multipart/form-data">
+      {% csrf_token %}
+      {{ form.input_text }}
+      <div class="columns mt-1">
+        <div class="file has-name is-fullwidth column is-half">
+          <label class="file-label">
+            {{ form.input_file }}
+            <span class="file-cta">
+              <span class="file-icon">
+                <i class="fas fa-upload"></i>
+              </span>
+              <span class="file-label-text">Choose a text file to scan…</span>
+            </span>
+            <span class="file-name is-hidden"></span>
+          </label>
+        </div>
+        <div class="column is-half">
+          <input type="submit" class="button is-fullwidth is-link" value="Scan License">
+        </div>
+      </div>
+    </form>
+  </section>
 </div>
 {% endblock %}
 
 {% block scripts %}
-<script>
+  <script type="text/javascript">
     const fileInput = document.querySelector('#id_input_file');
     fileInput.onchange = updateFile;
-    
+
     // Update the file name on upload
     function updateFile() {
       const fileName = document.querySelector('.file-name');
-      console.log(fileInput.files)
       if (fileInput.files.length > 0) {
         fileName.classList.remove('is-hidden')
         fileName.innerHTML = fileInput.files[0].name;
       }
     }
-
-</script>
+  </script>
 {% endblock %}
\ No newline at end of file
diff --git a/scantext/templates/scantext/license_summary.html b/scantext/templates/scantext/license_summary.html
index 650065694..4c50174b8 100644
--- a/scantext/templates/scantext/license_summary.html
+++ b/scantext/templates/scantext/license_summary.html
@@ -2,55 +2,60 @@
 {% load static humanize %}
 
 {% block extrahead %}
-<style>
+  <style>
     {% for mcolor in detected_licenses.match_colors %}{{ mcolor }}{% endfor %}
 
     .license-match {
-        font-size: 18px;
-        white-space: pre-wrap;
-        max-height: 80vh;
-        overflow: scroll;
-        border: 1px solid #efefef;
-        border-radius: 4px;
+      font-size: 18px;
+      white-space: pre-wrap;
+      overflow: scroll;
+      border: 1px solid #efefef;
+      border-radius: 4px;
     }
-</style>
+
+    .panel {
+      position: sticky;
+      top: 3vh;
+    }
+  </style>
 {% endblock %}
 
 {% block content %}
-<div class="container is-widescreen">
-    {% include 'scanpipe/includes/navbar_header.html' %}
+<div class="container is-fullwidth">
+  {% include 'scanpipe/includes/navbar_header.html' %}
 
-    {% include 'scantext/includes/license_summary_header.html' with detected_licenses=detected_licenses %}
-    <hr class="mx-1">
+  {% include 'scantext/includes/license_summary_header.html' with detected_licenses=detected_licenses %}
+  <hr class="mx-1">
 
-    <section class="tab-container">
-        {% include 'scantext/includes/license_summary_detail.html' with detected_licenses=detected_licenses %}
-    </section>
+  <section class="tab-container">
+    {% include 'scantext/includes/license_summary_detail.html' with detected_licenses=detected_licenses %}
+  </section>
 
 </div>
 {% endblock %}
 
 {% block scripts %}
-<script type="text/javascript">
+  <script type="text/javascript">
     const detailBtns = document.querySelectorAll('.license-details-btn')
     const modalCards = document.querySelectorAll('.license-details-modal')
     const closeModalBtns = document.querySelectorAll('.license-details-close-modal')
 
     detailBtns.forEach((btn, index) => {
-        btn.addEventListener('click', (e) => {
-            e.preventDefault()
-            modalCards.forEach(modalcard => {
-              modalcard.style.display= 'none'
-            })
-            modalCards[index].style.display= 'block'
+      btn.addEventListener('click', (e) => {
+        e.preventDefault()
+        modalCards.forEach(modalcard => {
+          modalcard.style.display= 'none'
         })
+        modalCards[index].style.display= 'block'
+      })
     })
 
     closeModalBtns.forEach((btn, index) => {
-        btn.addEventListener('click', (e) => {
-            e.preventDefault()
-            modalCards[index].style.display = 'none'
-        })
+      btn.addEventListener('click', (e) => {
+        e.preventDefault()
+        modalCards[index].style.display = 'none'
+      })
     })
-</script>
+
+  </script>
 {% endblock %}
\ No newline at end of file
diff --git a/scantext/tests/test_views.py b/scantext/tests/test_views.py
index a716fa89f..c72619794 100644
--- a/scantext/tests/test_views.py
+++ b/scantext/tests/test_views.py
@@ -14,8 +14,9 @@
 from licensedcode import models
 from licensedcode.spans import Span
 
-from scantext.views import get_license_keys_count
-from scantext.views import get_rule_text_url
+from scantext.match_text import get_build_colors
+from scantext.match_text import get_license_keys_count
+from scantext.match_text import get_rule_text_url
 
 TEST_DATA_DIR = os.path.join(os.path.dirname(__file__), "data")
 
@@ -35,42 +36,44 @@ def test_get_license_keys_count(self):
         match1 = match.LicenseMatch(rule=rule1, ispan=Span(), qspan=Span())
         match2 = match.LicenseMatch(rule=rule2, ispan=Span(), qspan=Span())
         match3 = match.LicenseMatch(rule=rule3, ispan=Span(), qspan=Span())
-
         matches = [match1, match2, match3]
-        assert get_license_keys_count(matches) == 4
 
-    def test_get_rule_text_url__for_rule(self):
+        response = get_license_keys_count(matches)
+        expected = 4
+        self.assertEqual(response, expected)
+
+    def test_get_rule_text_url_for_rule(self):
         rule1 = models.Rule(license_expression="Apache-2.0", stored_text="1")
         rule1.identifier = "Apache-2.0.RULE"
-        result = get_rule_text_url(rule=rule1, base_url="http://example.com")
 
-        assert result == "http://example.com/rules/Apache-2.0.RULE", result
+        response = get_rule_text_url(rule=rule1, base_url="http://example.com")
+        expected = "http://example.com/rules/Apache-2.0.RULE"
+        self.assertEqual(response, expected)
 
-    def test_get_rule_text_url__for_license(self):
+    def test_get_rule_text_url_for_license(self):
         rule1 = models.Rule(
             license_expression="Apache-2.0", stored_text="1", is_from_license=True
         )
         rule1.identifier = "Apache-2.0.LICENSE"
-        result = get_rule_text_url(rule=rule1, base_url="http://example.com")
 
-        assert result == "http://example.com/licenses/Apache-2.0.LICENSE", result
+        response = get_rule_text_url(rule=rule1, base_url="http://example.com")
+        expected = "http://example.com/licenses/Apache-2.0.LICENSE"
+        self.assertEqual(response, expected)
 
-    def test_get_rule_text_url__for_spdx(self):
+    def test_get_rule_text_url_for_spdx(self):
         rule1 = models.SpdxRule(license_expression="Apache-2.0", stored_text="1")
-        result = get_rule_text_url(rule=rule1, base_url="http://example.com")
-
-        assert not result
+        response = get_rule_text_url(rule=rule1, base_url="http://example.com")
+        self.assertIsNone(response)
 
-    def test_get_rule_text_url__for_unknown(self):
+    def test_get_rule_text_url_for_unknown(self):
         rule1 = models.UnknownRule(license_expression="Apache-2.0", stored_text="1")
-        result = get_rule_text_url(rule=rule1, base_url="http://example.com")
+        response = get_rule_text_url(rule=rule1, base_url="http://example.com")
+        self.assertIsNone(response)
 
-        assert not result
-
-    def test_get_rule_text_url__with_default_base_url(self):
+    def test_get_rule_text_url_with_default_base_url(self):
         rule1 = models.Rule(license_expression="apache-2.0 or mit", stored_text="1")
         rule1.identifier = "apache-2.0_or_mit_48.RULE"
-        result = get_rule_text_url(rule=rule1)
-        expected = SCANCODE_BASE_URL + "/rules/apache-2.0_or_mit_48.RULE"
 
-        assert result == expected
+        response = get_rule_text_url(rule=rule1)
+        expected = SCANCODE_BASE_URL + "/rules/apache-2.0_or_mit_48.RULE"
+        self.assertEqual(response, expected)
diff --git a/scantext/views.py b/scantext/views.py
index 7b8ef1cec..81f7826f2 100644
--- a/scantext/views.py
+++ b/scantext/views.py
@@ -20,34 +20,13 @@
 # ScanCode.io is a free software code scanning tool from nexB Inc. and others.
 # Visit https://github.com/nexB/scancode.io for support and download.
 
-import sys
 import tempfile
-from pprint import pprint
 
-from django.conf import settings
 from django.contrib import messages
 from django.shortcuts import render
 
-import attr
-from licensedcode import models
-from licensedcode import query
-from licensedcode.spans import Span
-from licensedcode.stopwords import STOPWORDS
-from licensedcode.tokenize import index_tokenizer
-from licensedcode.tokenize import matched_query_text_tokenizer
-
 from scantext.forms import LicenseScanForm
-from scantext.match_text import tokenize_matched_text
-
-TRACE_HIGHLIGHTED_TEXT = True
-
-SCANCODE_BASE_URL = (
-    "https://github.com/nexB/scancode-toolkit/tree/develop/src/licensedcode/data"
-)
-SCANCODE_REPO_URL = "https://github.com/nexB/scancode-toolkit"
-SPDX_LICENSE_URL = "https://spdx.org/licenses/{}"
-DEJACODE_LICENSE_URL = "https://enterprise.dejacode.com/urn/urn:dje:license:{}"
-SCANCODE_LICENSEDB_URL = "https://scancode-licensedb.aboutcode.org/{}"
+from scantext.match_text import get_licenses
 
 
 def license_scanview(request):
@@ -89,18 +68,17 @@ def license_scanview(request):
 
     # The flush in tempfile is required to ensure that the content is
     # written to the disk before it's read by get_licenses function
-    # TODO: check this to handle input files
-    # https://github.com/nexB/commoncode/blob/9131627677d3ef171ddc472991a5c4d4a3431ee3/src/commoncode/fileutils.py#L99
+    from commoncode.fileutils import get_temp_dir
 
+    temp_dir = get_temp_dir(prefix="scantext_")
     if input_text:
-        with tempfile.NamedTemporaryFile(mode="w") as temp_file:
+        with tempfile.NamedTemporaryFile(mode="w", dir=temp_dir) as temp_file:
             temp_file.write(input_text)
             temp_file.flush()
             expressions = get_licenses(location=temp_file.name)
     elif input_file:
         try:
-            # rework on how to handle temporary files.
-            with tempfile.NamedTemporaryFile(mode="w") as temp_file:
+            with tempfile.NamedTemporaryFile(mode="w", dir=temp_dir) as temp_file:
                 input_text = str(input_file.read(), "UTF-8")
                 temp_file.write(input_text)
                 temp_file.flush()
@@ -127,9 +105,6 @@ def license_scanview(request):
             },
         )
 
-    # import json
-    # print(json.dumps(expressions, indent=2))
-
     return render(
         request,
         "scantext/license_summary.html",
@@ -138,258 +113,3 @@ def license_scanview(request):
             "detected_licenses": expressions,
         },
     )
-
-
-def get_licenses(
-    location,
-    license_url_template=SCANCODE_LICENSEDB_URL,
-    **kwargs,
-):
-    """
-    Return a mapping of license match data from detecting license
-    in the file at ``location`` suitable for use in template.
-
-    The mapping can be empty if there are no matches.
-    """
-    from licensedcode.cache import get_index
-    from licensedcode.spans import Span
-
-    idx = get_index()
-
-    # gets matches from a license file
-    matches = idx.match(
-        location=location,
-        unknown_licenses=True,
-        **kwargs,
-    )
-
-    if not matches:
-        return {}
-
-    query = matches[0].query
-
-    # Assign a numeric id to every match.
-    matches_by_id = dict(enumerate(matches))
-
-    del matches
-
-    license_matches = []
-
-    for mid, match in matches_by_id.items():
-        license_matches.append(
-            get_match_details(
-                mid=mid,
-                match=match,
-                license_url_template=license_url_template,
-                spdx_license_url=SPDX_LICENSE_URL,
-            )
-        )
-
-    license_tokens = get_license_tokens(
-        query=query,
-        matches_by_id=matches_by_id,
-        stopwords=STOPWORDS,
-        trace=TRACE_HIGHLIGHTED_TEXT,
-    )
-
-    match_colors = build_colors(matches_by_id=matches_by_id)
-    # print(match_colors)
-
-    return {
-        "license_matches": license_matches,
-        "license_tokens": license_tokens,
-        "match_colors": match_colors,
-        "license_keys_count": get_license_keys_count(matches=matches_by_id.values()),
-        "percentage_of_license_text": get_percentage_of_license_text(
-            query=query, matches=matches_by_id.values()
-        ),
-    }
-
-
-def get_percentage_of_license_text(query, matches):
-    """
-    Return percentage of license text matched in ``query`` Query by
-    a list of ``matches`` percentage is a float between 0 and 100.
-    """
-
-    # TODO: percentage of license text should be done by scancode-toolkit.
-    if not matches:
-        return 0
-
-    qspans = (match.qspan for match in matches)
-
-    matched_tokens_length = len(Span().union(*qspans))
-    query_tokens_length = query.tokens_length(with_unknown=True)
-    return round((matched_tokens_length / query_tokens_length) * 100, 2)
-
-
-def get_license_tokens(
-    query,
-    matches_by_id,
-    stopwords=STOPWORDS,
-    trace=TRACE_HIGHLIGHTED_TEXT,
-):
-    """
-    Return a list of tokens from the list of ``matches`` in ``query``.
-    """
-    # Token(value="", pos=3, is_text=True, is_matched=True, match_ids=[mid, mid, mid])
-    tokens = list(
-        tokenize_matched_text(
-            location=query.location,
-            query_string=query.query_string,
-            dictionary=query.idx.dictionary,
-            start_line=query.start_line,
-        )
-    )
-
-    for mid, match in matches_by_id.items():
-        tag_matched_tokens(tokens=tokens, match_qspan=match.qspan, mid=mid)
-
-    return tokens
-
-
-def tag_matched_tokens(tokens, match_qspan, mid):
-    """
-    Tag an iterable of ``tokens`` tagging each token with ``mid`` match id
-    if matched meaning the token is in the ``match_qspan``.
-    """
-    previous_is_matched = False
-    for tok in tokens:
-        if previous_is_matched and not tok.is_text:
-            tok.match_ids.append(mid)
-            tok = attr.evolve(tok, is_matched=True)
-            previous_is_matched = False
-        elif tok.pos != -1 and tok.is_known and tok.pos in match_qspan:
-            tok.match_ids.append(mid)
-            tok = attr.evolve(tok, is_matched=True)
-            previous_is_matched = True
-
-
-def build_colors(matches_by_id):
-    """
-    Return a mapping of mid to css color code.
-
-    .matched1 {background-color: rgba(30, 220, 90, 0.3);}
-    .matched2 {background-color: rgba(30, 90, 220, 0.3);}
-    .matched3 {background-color: rgba(220, 90, 30, 0.3);}
-    """
-    return [
-        f""".matched{mid} {{background-color: rgba(
-        {(244 * (mid+1)) % 255}, {(234 * (mid+1)) % 255}, {(130 * (mid+1)) % 255},
-        0.3);}}"""
-        for mid in matches_by_id
-    ]
-
-
-def get_match_details(
-    mid,
-    match,
-    license_url_template,
-    spdx_license_url,
-):
-    """
-    Return a mapping of license data built from a LicenseMatch ``match``.
-    """
-    from licensedcode import cache
-
-    licenses = cache.get_licenses_db()
-
-    # TODO: decide whether the text should be highlighted or not.
-    matched_text = match.matched_text(whole_lines=False, highlight=False)
-
-    SCANCODE_LICENSE_TEXT_URL = SCANCODE_BASE_URL + "/{}.LICENSE"
-    SCANCODE_LICENSE_DATA_URL = SCANCODE_BASE_URL + "/{}.yml"
-
-    result = {}
-
-    result["mid"] = mid
-    # Detection Level Information
-    result["score"] = match.score()
-    result["start_line"] = match.start_line
-    result["end_line"] = match.end_line
-    result["matched_length"] = match.len()
-    result["match_coverage"] = match.coverage()
-    result["matcher"] = match.matcher
-
-    # LicenseDB Level Information (Rule that was matched)
-    result["license_expression"] = match.rule.license_expression
-    result["rule_text_url"] = get_rule_text_url(match.rule)
-    result["rule_identifier"] = match.rule.identifier
-    result["referenced_filenames"] = match.rule.referenced_filenames
-    result["is_license_text"] = match.rule.is_license_text
-    result["is_license_notice"] = match.rule.is_license_notice
-    result["is_license_reference"] = match.rule.is_license_reference
-    result["is_license_tag"] = match.rule.is_license_tag
-    result["is_license_intro"] = match.rule.is_license_intro
-    result["rule_length"] = match.rule.length
-    result["rule_relevance"] = match.rule.relevance
-    result["matched_text"] = matched_text
-
-    # License Level Information (Individual licenses that this rule refers to)
-    result["licenses"] = detected_licenses = []
-    for license_key in match.rule.license_keys():
-        detected_license = {}
-        detected_licenses.append(detected_license)
-
-        lic = licenses.get(license_key)
-
-        detected_license["key"] = lic.key
-        detected_license["name"] = lic.name
-        detected_license["short_name"] = lic.short_name
-        detected_license["category"] = lic.category
-        detected_license["is_exception"] = lic.is_exception
-        detected_license["is_unknown"] = lic.is_unknown
-        detected_license["owner"] = lic.owner
-        detected_license["homepage_url"] = lic.homepage_url
-        detected_license["text_url"] = lic.text_urls[0] if lic.text_urls else ""
-        detected_license["reference_url"] = license_url_template.format(lic.key)
-        detected_license["scancode_text_url"] = SCANCODE_LICENSE_TEXT_URL.format(
-            lic.key
-        )
-        detected_license["scancode_data_url"] = SCANCODE_LICENSE_DATA_URL.format(
-            lic.key
-        )
-
-        spdx_key = lic.spdx_license_key
-        detected_license["spdx_license_key"] = spdx_key
-
-        if spdx_key:
-            is_license_ref = spdx_key.lower().startswith("licenseref-")
-            if is_license_ref:
-                spdx_url = SCANCODE_LICENSE_TEXT_URL.format(lic.key)
-            else:
-                # TODO: Is this replacing spdx_key???
-                spdx_key = lic.spdx_license_key.rstrip("+")
-                spdx_url = spdx_license_url.format(spdx_key)
-        else:
-            spdx_url = ""
-        detected_license["spdx_url"] = spdx_url
-
-    return result
-
-
-def get_license_keys_count(matches):
-    """
-    Return the number of unique license keys found in a list of license matches.
-    """
-    keys = set()
-    for match in matches:
-        keys.update(match.rule.license_keys())
-
-    return len(keys)
-
-
-def get_rule_text_url(rule, base_url=SCANCODE_BASE_URL):
-    """
-    Return a URL to the text file of a ``rule`` Rule.
-    Return None if there is no URL for the ``rule``.
-    """
-
-    if isinstance(rule, (models.SpdxRule, models.UnknownRule)):
-        return
-
-    if rule.is_from_license:
-        return f"{base_url}/licenses/{rule.identifier}"
-
-    else:
-        return f"{base_url}/rules/{rule.identifier}"

From 3def7117ad4647af0cad89de33fba5bd26079b8c Mon Sep 17 00:00:00 2001
From: Akhil Raj <lf32.dev@gmail.com>
Date: Mon, 19 Sep 2022 12:41:27 +0530
Subject: [PATCH 49/59] Add more licenses, highlight text properly #450

* Added few more licenses to test
* Correct highlighting text
* More styles to highlights

Signed-off-by: Akhil Raj <lf32.dev@gmail.com>
---
 scantext/match_text.py                        | 21 ++++++++++++++++---
 .../includes/license_summary_detail.html      |  4 ++--
 .../templates/scantext/license_summary.html   |  5 ++++-
 scantext/tests/data/LICENSES                  |  8 ++++++-
 4 files changed, 31 insertions(+), 7 deletions(-)

diff --git a/scantext/match_text.py b/scantext/match_text.py
index 615fe3fb6..97d4842d4 100644
--- a/scantext/match_text.py
+++ b/scantext/match_text.py
@@ -78,6 +78,8 @@ class Token:
     # List of LicenseMatch ids that match this token
     match_ids = attr.ib(attr.Factory(list))
 
+    match_rules = attr.ib(default=None)
+
 
 def get_match_details(mid, match, license_url_template, spdx_license_url):
     """
@@ -207,6 +209,16 @@ def get_licenses(location, license_url_template=SCANCODE_LICENSEDB_URL, **kwargs
         stopwords=STOPWORDS,
     )
 
+    for tkn in license_tokens:
+        if tkn.match_ids:
+            rules, seperator = [], ", "
+            for rule_id in tkn.match_ids:
+                rules.append(license_matches[rule_id]["license_expression"])
+            tkn.match_rules = seperator.join(rules)
+            del rules
+        else:
+            tkn.match_rules = "No match found."
+
     match_colors = get_build_colors(matches_by_id=matches_by_id)
 
     return {
@@ -347,9 +359,12 @@ def get_build_colors(matches_by_id):
     .matched3 {background-color: rgba(220, 90, 30, 0.3);}
     """
     return [
-        f""".matched{mid} {{background-color: rgba(
-        {(244 * (mid+1)) % 255}, {(234 * (mid+1)) % 255}, {(130 * (mid+1)) % 255},
-        0.3);}}"""
+        f""".matched{mid} {{
+        background-color: rgba(
+        {(244 * (mid+1)) % 255}, {(234 * (mid+1)) % 255}, {(130 * (mid+1)) % 255}, 0.3);
+        border-bottom: 3px solid rgba(
+        {(244 * (mid+1)) % 255}, {(234 * (mid+1)) % 255}, {(130 * (mid+1)) % 255}, 0.7);
+        }}"""
         for mid in matches_by_id
     ]
 
diff --git a/scantext/templates/scantext/includes/license_summary_detail.html b/scantext/templates/scantext/includes/license_summary_detail.html
index 9ee936055..dfc8686f5 100644
--- a/scantext/templates/scantext/includes/license_summary_detail.html
+++ b/scantext/templates/scantext/includes/license_summary_detail.html
@@ -7,7 +7,7 @@
         <div class="card-header-title"><span class="has-text-link mr-2">{{ license.mid|add:1 }}.</span> {{ license.license_expression }}</div>
         <div class="is-flex is-justify-content-row is-align-items-center">
           <p class="tag is-6 mx-1 is-light {% if license.score == 100 %} is-success {% else %} is-warning {% endif %}">{{ license.score }}%</p>
-          <div class="dropdown is-hoverable">
+          <div class="dropdown is-hoverable" title="Click to scrolldown to the matched line.">
             <a href="#{{ license.mid }}">
               <p class="card-header-icon">
                 <span class="icon">
@@ -63,6 +63,6 @@
   </div>
   <div class="column is-two-third">
     <p class="title is-5">Input Text</p>
-    <div class="license-match">{% for token in detected_licenses.license_tokens %}<span class="license-token matched{{ token.match_ids.0 }}" id="{{ token.match_ids.0 }}">{{ token.value }}</span>{% endfor %}</div>
+    <div class="license-match">{% for token in detected_licenses.license_tokens %}<span class="license-token matched{{ token.match_ids.0 }}" id="{{ token.match_ids.0 }}" title="{{ token.match_rules }}">{{ token.value }}</span>{% endfor %}</div>
   </div>
 </div>
\ No newline at end of file
diff --git a/scantext/templates/scantext/license_summary.html b/scantext/templates/scantext/license_summary.html
index 4c50174b8..b84529ba4 100644
--- a/scantext/templates/scantext/license_summary.html
+++ b/scantext/templates/scantext/license_summary.html
@@ -6,11 +6,14 @@
     {% for mcolor in detected_licenses.match_colors %}{{ mcolor }}{% endfor %}
 
     .license-match {
-      font-size: 18px;
+      max-height: 90vh;
       white-space: pre-wrap;
       overflow: scroll;
       border: 1px solid #efefef;
       border-radius: 4px;
+      color: rgb(14, 16, 26);
+      font-size: 16px;
+      font-weight: 400;
     }
 
     .panel {
diff --git a/scantext/tests/data/LICENSES b/scantext/tests/data/LICENSES
index 1866e7ba7..4e2825c9b 100644
--- a/scantext/tests/data/LICENSES
+++ b/scantext/tests/data/LICENSES
@@ -746,4 +746,10 @@ FITNESS, IN NO EVENT SHALL STICHTING MATHEMATISCH CENTRUM BE LIABLE
 FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 
 WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 
 ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT 
-OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
\ No newline at end of file
+OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+
+Apache-2.0
+
+MIT
+
+Lesser GPL

From ebcb6e2e15d1a25a81d7e6607c6e4eea0e52ceea Mon Sep 17 00:00:00 2001
From: Akhil Raj <lf32.dev@gmail.com>
Date: Mon, 19 Sep 2022 12:55:57 +0530
Subject: [PATCH 50/59] Set cursor style to help, add token attr desc #450

Signed-off-by: Akhil Raj <lf32.dev@gmail.com>
---
 scantext/match_text.py                           | 1 +
 scantext/templates/scantext/license_summary.html | 1 +
 2 files changed, 2 insertions(+)

diff --git a/scantext/match_text.py b/scantext/match_text.py
index 97d4842d4..6053f06ac 100644
--- a/scantext/match_text.py
+++ b/scantext/match_text.py
@@ -78,6 +78,7 @@ class Token:
     # List of LicenseMatch ids that match this token
     match_ids = attr.ib(attr.Factory(list))
 
+    # Rules collected from license_matches using above match_ids
     match_rules = attr.ib(default=None)
 
 
diff --git a/scantext/templates/scantext/license_summary.html b/scantext/templates/scantext/license_summary.html
index b84529ba4..a719db606 100644
--- a/scantext/templates/scantext/license_summary.html
+++ b/scantext/templates/scantext/license_summary.html
@@ -14,6 +14,7 @@
       color: rgb(14, 16, 26);
       font-size: 16px;
       font-weight: 400;
+      cursor: help;
     }
 
     .panel {

From cda3a40caa806b553e71c9443585c9d1058b0710 Mon Sep 17 00:00:00 2001
From: Akhil Raj <lf32.dev@gmail.com>
Date: Wed, 21 Sep 2022 16:16:26 +0530
Subject: [PATCH 51/59] Connect left and right part of ui #450

Signed-off-by: Akhil Raj <lf32.dev@gmail.com>
---
 scantext/match_text.py                        |  3 +-
 .../includes/license_summary_detail.html      |  5 ++-
 .../templates/scantext/license_summary.html   | 45 +++++++++++++++++++
 3 files changed, 49 insertions(+), 4 deletions(-)

diff --git a/scantext/match_text.py b/scantext/match_text.py
index 6053f06ac..22330b6b5 100644
--- a/scantext/match_text.py
+++ b/scantext/match_text.py
@@ -359,12 +359,11 @@ def get_build_colors(matches_by_id):
     .matched2 {background-color: rgba(30, 90, 220, 0.3);}
     .matched3 {background-color: rgba(220, 90, 30, 0.3);}
     """
+
     return [
         f""".matched{mid} {{
         background-color: rgba(
         {(244 * (mid+1)) % 255}, {(234 * (mid+1)) % 255}, {(130 * (mid+1)) % 255}, 0.3);
-        border-bottom: 3px solid rgba(
-        {(244 * (mid+1)) % 255}, {(234 * (mid+1)) % 255}, {(130 * (mid+1)) % 255}, 0.7);
         }}"""
         for mid in matches_by_id
     ]
diff --git a/scantext/templates/scantext/includes/license_summary_detail.html b/scantext/templates/scantext/includes/license_summary_detail.html
index dfc8686f5..a9a5b36b9 100644
--- a/scantext/templates/scantext/includes/license_summary_detail.html
+++ b/scantext/templates/scantext/includes/license_summary_detail.html
@@ -3,9 +3,10 @@
     <p class="title is-5">Detected Licenses</p>
     <div class="panel is-light is-shadowless">
     {% for license in detected_licenses.license_matches %}
-      <div class="panel-block is-flex is-justify-content-space-between p-0 matched{{ license.mid }}">
+      <div class="license-block panel-block is-flex is-justify-content-space-between p-0" data-match="matched{{ license.mid }}">
         <div class="card-header-title"><span class="has-text-link mr-2">{{ license.mid|add:1 }}.</span> {{ license.license_expression }}</div>
         <div class="is-flex is-justify-content-row is-align-items-center">
+          <p class="selected tag is-info is-hidden">selected</p>
           <p class="tag is-6 mx-1 is-light {% if license.score == 100 %} is-success {% else %} is-warning {% endif %}">{{ license.score }}%</p>
           <div class="dropdown is-hoverable" title="Click to scrolldown to the matched line.">
             <a href="#{{ license.mid }}">
@@ -63,6 +64,6 @@
   </div>
   <div class="column is-two-third">
     <p class="title is-5">Input Text</p>
-    <div class="license-match">{% for token in detected_licenses.license_tokens %}<span class="license-token matched{{ token.match_ids.0 }}" id="{{ token.match_ids.0 }}" title="{{ token.match_rules }}">{{ token.value }}</span>{% endfor %}</div>
+    <div class="license-match">{% for token in detected_licenses.license_tokens %}<span class="license-token matched{{ token.match_ids.0 }}" data-id="{{ token.match_ids.0 }}" id="{{ token.match_ids.0 }}" title="{{ token.match_rules }}">{{ token.value }}</span>{% endfor %}</div>
   </div>
 </div>
\ No newline at end of file
diff --git a/scantext/templates/scantext/license_summary.html b/scantext/templates/scantext/license_summary.html
index a719db606..ab5e949a8 100644
--- a/scantext/templates/scantext/license_summary.html
+++ b/scantext/templates/scantext/license_summary.html
@@ -21,6 +21,10 @@
       position: sticky;
       top: 3vh;
     }
+
+    .underline {
+      border-bottom: 2px solid red;
+    }
   </style>
 {% endblock %}
 
@@ -41,6 +45,9 @@
 {% block scripts %}
   <script type="text/javascript">
     const detailBtns = document.querySelectorAll('.license-details-btn')
+    const panelBlocks = document.querySelectorAll('.license-block')
+    const panelBtns = document.querySelectorAll('.card-header-icon')
+    const tokens = document.querySelectorAll('.license-token')
     const modalCards = document.querySelectorAll('.license-details-modal')
     const closeModalBtns = document.querySelectorAll('.license-details-close-modal')
 
@@ -54,6 +61,44 @@
       })
     })
 
+    // Click on panels => highlight both panel and tokens
+
+    panelBtns.forEach((btn, index) => {
+      btn.addEventListener('click', (e) => {
+        panelBlocks.forEach(blk => {
+          blk.querySelector('.selected').classList.add('is-hidden')
+        })
+
+        tokens.forEach((tkn, ind) => {
+          tkn.classList.remove('underline')
+          id = tkn.getAttribute('data-id')
+          if (id == index) {
+            tkn.classList.add('underline')
+          }
+        })
+        panelBlocks[index].querySelector('.selected').classList.remove('is-hidden')
+      })
+    })
+
+    // Click on tokens => highlight both panel and tokens
+
+    tokens.forEach((tkn, index) => {
+      tkn.addEventListener('click', (e) => {
+        panelBlocks.forEach(blk => {
+          blk.querySelector('.selected').classList.add('is-hidden')
+        })
+        id = tkn.getAttribute('data-id') 
+        tokens.forEach((tn, ind) => {
+          tn.classList.remove('underline')
+          tid = tn.getAttribute('data-id')
+          if (id && id == tid) {
+            tn.classList.add('underline')
+          }
+        })
+        panelBlocks[id].querySelector('.selected').classList.remove('is-hidden')
+      })
+    })
+
     closeModalBtns.forEach((btn, index) => {
       btn.addEventListener('click', (e) => {
         e.preventDefault()

From 9c404cc247a78f13ef3f3708ec3c64e7ac806a6c Mon Sep 17 00:00:00 2001
From: Akhil Raj <lf32.dev@gmail.com>
Date: Wed, 21 Sep 2022 17:27:17 +0530
Subject: [PATCH 52/59] Set predefined colors to 3 #450

Signed-off-by: Akhil Raj <lf32.dev@gmail.com>
---
 scantext/match_text.py | 17 ++++++++++-------
 1 file changed, 10 insertions(+), 7 deletions(-)

diff --git a/scantext/match_text.py b/scantext/match_text.py
index 22330b6b5..7b25f9108 100644
--- a/scantext/match_text.py
+++ b/scantext/match_text.py
@@ -353,21 +353,24 @@ def tag_matched_tokens(tokens, match_qspan, mid):
 
 def get_build_colors(matches_by_id):
     """
-    Return a mapping of mid to css color code.
+    Return a list of color codes.
 
     .matched1 {background-color: rgba(30, 220, 90, 0.3);}
     .matched2 {background-color: rgba(30, 90, 220, 0.3);}
     .matched3 {background-color: rgba(220, 90, 30, 0.3);}
     """
+    color_code_ids = []
 
-    return [
-        f""".matched{mid} {{
-        background-color: rgba(
-        {(244 * (mid+1)) % 255}, {(234 * (mid+1)) % 255}, {(130 * (mid+1)) % 255}, 0.3);
-        }}"""
-        for mid in matches_by_id
+    color_codes = [
+        ".matched{} {{background-color: rgba(30, 220, 90, 0.3);}}",
+        ".matched{} {{background-color: rgba(30, 90, 220, 0.3);}}",
+        ".matched{} {{background-color: rgba(220, 90, 30, 0.3);}}",
     ]
 
+    for mid in matches_by_id:
+        color_code_ids.append(color_codes[mid % 3].format(mid))
+    return color_code_ids
+
 
 def get_percentage_of_license_text(query, matches):
     """

From 6fab923a134741b5ceb7605ca1457487bc360c45 Mon Sep 17 00:00:00 2001
From: Akhil Raj <lf32.dev@gmail.com>
Date: Mon, 26 Sep 2022 14:33:19 +0530
Subject: [PATCH 53/59] Add more colors & new test licenses

Signed-off-by: Akhil Raj <lf32.dev@gmail.com>
---
 scantext/match_text.py                        |  13 ++-
 .../includes/license_summary_detail.html      |  13 ++-
 .../templates/scantext/license_summary.html   |  37 +++---
 scantext/tests/data/complexLICENSES           | 106 ++++++++++++++++++
 scantext/tests/data/miniLICENSES              |   5 +
 5 files changed, 150 insertions(+), 24 deletions(-)
 create mode 100644 scantext/tests/data/complexLICENSES
 create mode 100644 scantext/tests/data/miniLICENSES

diff --git a/scantext/match_text.py b/scantext/match_text.py
index 7b25f9108..c85adc42f 100644
--- a/scantext/match_text.py
+++ b/scantext/match_text.py
@@ -362,13 +362,18 @@ def get_build_colors(matches_by_id):
     color_code_ids = []
 
     color_codes = [
-        ".matched{} {{background-color: rgba(30, 220, 90, 0.3);}}",
-        ".matched{} {{background-color: rgba(30, 90, 220, 0.3);}}",
-        ".matched{} {{background-color: rgba(220, 90, 30, 0.3);}}",
+        ".matched{} {{background-color: rgba(255, 176, 0, 1);}}",
+        ".matched{} {{background-color: rgba(98, 160, 234, 1);}}",
+        ".matched{} {{background-color: rgba(100, 143, 255, 1);}}",
+        ".matched{} {{background-color: rgba(120, 94, 240, 1);}}",
+        ".matched{} {{background-color: rgba(220, 38, 127, 1);}}",
+        ".matched{} {{background-color: rgba(254, 97, 0, 1);}}",
+        ".matched{} {{background-color: rgba(204, 121, 167, 1);}}",
+        ".matched{} {{background-color: rgba(68, 170, 153, 1);}}",
     ]
 
     for mid in matches_by_id:
-        color_code_ids.append(color_codes[mid % 3].format(mid))
+        color_code_ids.append(color_codes[mid % 8].format(mid))
     return color_code_ids
 
 
diff --git a/scantext/templates/scantext/includes/license_summary_detail.html b/scantext/templates/scantext/includes/license_summary_detail.html
index a9a5b36b9..6ed4d05ac 100644
--- a/scantext/templates/scantext/includes/license_summary_detail.html
+++ b/scantext/templates/scantext/includes/license_summary_detail.html
@@ -4,18 +4,19 @@
     <div class="panel is-light is-shadowless">
     {% for license in detected_licenses.license_matches %}
       <div class="license-block panel-block is-flex is-justify-content-space-between p-0" data-match="matched{{ license.mid }}">
-        <div class="card-header-title"><span class="has-text-link mr-2">{{ license.mid|add:1 }}.</span> {{ license.license_expression }}</div>
+        <a class="atag" href="#{{ license.mid }}">
+          <div class="card-header-title">
+            <span class="mr-2">{{ license.mid|add:1 }}.</span>{{ license.license_expression }}
+          </div>
+        </a>
         <div class="is-flex is-justify-content-row is-align-items-center">
-          <p class="selected tag is-info is-hidden">selected</p>
           <p class="tag is-6 mx-1 is-light {% if license.score == 100 %} is-success {% else %} is-warning {% endif %}">{{ license.score }}%</p>
-          <div class="dropdown is-hoverable" title="Click to scrolldown to the matched line.">
-            <a href="#{{ license.mid }}">
+          <div class="dropdown is-hoverable is-right">
               <p class="card-header-icon">
                 <span class="icon">
-                  <i class="fa fa-angle-down" aria-hidden="true"></i>
+                  <i class="fa fa-info-circle" aria-hidden="true"></i>
                 </span>
               </p>
-            </a>
             <div class="dropdown-menu">
               <div class="dropdown-content py-0">
                 <table class="panel-block table is-striped is-size-6 is-hoverable mb-0" style="white-space: nowrap;">
diff --git a/scantext/templates/scantext/license_summary.html b/scantext/templates/scantext/license_summary.html
index ab5e949a8..996635806 100644
--- a/scantext/templates/scantext/license_summary.html
+++ b/scantext/templates/scantext/license_summary.html
@@ -9,10 +9,10 @@
       max-height: 90vh;
       white-space: pre-wrap;
       overflow: scroll;
-      border: 1px solid #efefef;
+      border: 1px solid #cdcdcd;
       border-radius: 4px;
       color: rgb(14, 16, 26);
-      font-size: 16px;
+      font-size: 18px;
       font-weight: 400;
       cursor: help;
     }
@@ -22,8 +22,16 @@
       top: 3vh;
     }
 
-    .underline {
-      border-bottom: 2px solid red;
+    .license-token {
+      opacity: 0.6;
+    }
+
+    .highlight {
+      opacity: 1;
+    }
+
+    a {
+      display: block;
     }
   </style>
 {% endblock %}
@@ -46,7 +54,7 @@
   <script type="text/javascript">
     const detailBtns = document.querySelectorAll('.license-details-btn')
     const panelBlocks = document.querySelectorAll('.license-block')
-    const panelBtns = document.querySelectorAll('.card-header-icon')
+    const panelBtns = document.querySelectorAll('.card-header-title')
     const tokens = document.querySelectorAll('.license-token')
     const modalCards = document.querySelectorAll('.license-details-modal')
     const closeModalBtns = document.querySelectorAll('.license-details-close-modal')
@@ -66,17 +74,18 @@
     panelBtns.forEach((btn, index) => {
       btn.addEventListener('click', (e) => {
         panelBlocks.forEach(blk => {
-          blk.querySelector('.selected').classList.add('is-hidden')
+          blk.classList.remove('has-background-info')
+          blk.classList.remove('has-text-white')
         })
 
         tokens.forEach((tkn, ind) => {
-          tkn.classList.remove('underline')
+          tkn.classList.remove('highlight')
           id = tkn.getAttribute('data-id')
-          if (id == index) {
-            tkn.classList.add('underline')
+          if (id && id == index) {
+            tkn.classList.add('highlight')
           }
         })
-        panelBlocks[index].querySelector('.selected').classList.remove('is-hidden')
+        panelBlocks[index].classList.add('has-background-info')
       })
     })
 
@@ -85,17 +94,17 @@
     tokens.forEach((tkn, index) => {
       tkn.addEventListener('click', (e) => {
         panelBlocks.forEach(blk => {
-          blk.querySelector('.selected').classList.add('is-hidden')
+          blk.classList.remove('has-background-info')
         })
         id = tkn.getAttribute('data-id') 
         tokens.forEach((tn, ind) => {
-          tn.classList.remove('underline')
+          tn.classList.remove('highlight')
           tid = tn.getAttribute('data-id')
           if (id && id == tid) {
-            tn.classList.add('underline')
+            tn.classList.add('highlight')
+            panelBlocks[tid].classList.add('has-background-info')
           }
         })
-        panelBlocks[id].querySelector('.selected').classList.remove('is-hidden')
       })
     })
 
diff --git a/scantext/tests/data/complexLICENSES b/scantext/tests/data/complexLICENSES
new file mode 100644
index 000000000..a0b64aea1
--- /dev/null
+++ b/scantext/tests/data/complexLICENSES
@@ -0,0 +1,106 @@
+#FFmpeg:
+
+Most files in FFmpeg are under the GNU Lesser General Public License version 2.1
+or later (LGPL v2.1+). Read the file COPYING.LGPLv2.1 for details. Some other
+files have MIT/X11/BSD-style licenses. In combination the LGPL v2.1+ applies to
+FFmpeg.
+
+Some optional parts of FFmpeg are licensed under the GNU General Public License
+version 2 or later (GPL v2+). See the file COPYING.GPLv2 for details. None of
+these parts are used by default, you have to explicitly pass --enable-gpl to
+configure to activate them. In this case, FFmpeg's license changes to GPL v2+.
+
+Specifically, the GPL parts of FFmpeg are:
+
+- libpostproc
+- libmpcodecs
+- optional x86 optimizations in the files
+  libavcodec/x86/flac_dsp_gpl.asm
+  libavcodec/x86/idct_mmx.c
+- libutvideo encoding/decoding wrappers in
+  libavcodec/libutvideo*.cpp
+- the X11 grabber in libavdevice/x11grab.c
+- the swresample test app in
+  libswresample/swresample-test.c
+- the texi2pod.pl tool
+- the following filters in libavfilter:
+    - f_ebur128.c
+    - vf_blackframe.c
+    - vf_boxblur.c
+    - vf_colormatrix.c
+    - vf_cropdetect.c
+    - vf_decimate.c
+    - vf_delogo.c
+    - vf_geq.c
+    - vf_histeq.c
+    - vf_hqdn3d.c
+    - vf_interlace.c
+    - vf_kerndeint.c
+    - vf_mcdeint.c
+    - vf_mp.c
+    - vf_owdenoise.c
+    - vf_perspective.c
+    - vf_phase.c
+    - vf_pp.c
+    - vf_pullup.c
+    - vf_sab.c
+    - vf_smartblur.c
+    - vf_spp.c
+    - vf_stereo3d.c
+    - vf_super2xsai.c
+    - vf_tinterlace.c
+    - vsrc_mptestsrc.c
+
+Should you, for whatever reason, prefer to use version 3 of the (L)GPL, then
+the configure parameter --enable-version3 will activate this licensing option
+for you. Read the file COPYING.LGPLv3 or, if you have enabled GPL parts,
+COPYING.GPLv3 to learn the exact legal terms that apply in this case.
+
+There are a handful of files under other licensing terms, namely:
+
+* The files libavcodec/jfdctfst.c, libavcodec/jfdctint_template.c and
+  libavcodec/jrevdct.c are taken from libjpeg, see the top of the files for
+  licensing details. Specifically note that you must credit the IJG in the
+  documentation accompanying your program if you only distribute executables.
+  You must also indicate any changes including additions and deletions to
+  those three files in the documentation.
+  tests/reference.pnm is under the expat license
+
+
+external libraries
+==================
+
+FFmpeg can be combined with a number of external libraries, which sometimes
+affect the licensing of binaries resulting from the combination.
+
+compatible libraries
+--------------------
+
+The following libraries are under GPL:
+    - frei0r
+    - libcdio
+    - libutvideo
+    - libvidstab
+    - libx264
+    - libx265
+    - libxavs
+    - libxvid
+When combining them with FFmpeg, FFmpeg needs to be licensed as GPL as well by
+passing --enable-gpl to configure.
+
+The OpenCORE and VisualOn libraries are under the Apache License 2.0. That
+license is incompatible with the LGPL v2.1 and the GPL v2, but not with
+version 3 of those licenses. So to combine these libraries with FFmpeg, the
+license version needs to be upgraded by passing --enable-version3 to configure.
+
+incompatible libraries
+----------------------
+
+The Fraunhofer AAC library, FAAC and aacplus are under licenses which
+are incompatible with the GPLv2 and v3. We do not know for certain if their
+licenses are compatible with the LGPL.
+If you wish to enable these libraries, pass --enable-nonfree to configure.
+But note that if you enable any of these libraries the resulting binary will
+be under a complex license mix that is more restrictive than the LGPL and that
+may result in additional obligations. It is possible that these
+restrictions cause the resulting binary to be unredistributeable.
\ No newline at end of file
diff --git a/scantext/tests/data/miniLICENSES b/scantext/tests/data/miniLICENSES
new file mode 100644
index 000000000..ea5c84937
--- /dev/null
+++ b/scantext/tests/data/miniLICENSES
@@ -0,0 +1,5 @@
+Apache-2.0
+
+MIT
+
+Lesser GPL
\ No newline at end of file

From 4760050f3ddcb8a139c3d376d9e11400f880be76 Mon Sep 17 00:00:00 2001
From: Akhil Raj <lf32.dev@gmail.com>
Date: Wed, 28 Sep 2022 16:08:58 +0530
Subject: [PATCH 54/59] Add highlight color and get rid of modal #450

Signed-off-by: Akhil Raj <lf32.dev@gmail.com>
---
 scantext/match_text.py                        |  3 +-
 .../includes/license_detail_modal.html        | 66 -------------------
 .../includes/license_summary_detail.html      | 40 ++++++++---
 .../templates/scantext/license_summary.html   | 44 ++++---------
 4 files changed, 45 insertions(+), 108 deletions(-)
 delete mode 100644 scantext/templates/scantext/includes/license_detail_modal.html

diff --git a/scantext/match_text.py b/scantext/match_text.py
index c85adc42f..f7fa85e2c 100644
--- a/scantext/match_text.py
+++ b/scantext/match_text.py
@@ -363,7 +363,6 @@ def get_build_colors(matches_by_id):
 
     color_codes = [
         ".matched{} {{background-color: rgba(255, 176, 0, 1);}}",
-        ".matched{} {{background-color: rgba(98, 160, 234, 1);}}",
         ".matched{} {{background-color: rgba(100, 143, 255, 1);}}",
         ".matched{} {{background-color: rgba(120, 94, 240, 1);}}",
         ".matched{} {{background-color: rgba(220, 38, 127, 1);}}",
@@ -373,7 +372,7 @@ def get_build_colors(matches_by_id):
     ]
 
     for mid in matches_by_id:
-        color_code_ids.append(color_codes[mid % 8].format(mid))
+        color_code_ids.append(color_codes[mid % len(color_codes)].format(mid))
     return color_code_ids
 
 
diff --git a/scantext/templates/scantext/includes/license_detail_modal.html b/scantext/templates/scantext/includes/license_detail_modal.html
deleted file mode 100644
index b85171ce5..000000000
--- a/scantext/templates/scantext/includes/license_detail_modal.html
+++ /dev/null
@@ -1,66 +0,0 @@
-<div class="modal license-details-modal">
-  <div class="modal-background"></div>
-  <div class="modal-card" style="margin-top: 10vh">
-    <header class="modal-card-head">
-      <p class="modal-card-title">{{ license.license_expression }}</p>
-      <button class="delete license-details-close-modal" aria-label="close"></button>
-    </header>
-    <section class="modal-card-body is-4by4">
-      <table class="table is-striped is-hoverable is-fullwidth is-size-6">
-        <tbody>
-          <tr>
-            <td><strong>Score</strong></td>
-            <td>{{ license.score }}</td>
-          </tr>
-          <tr>
-            <td><strong>Matched Line(s)</strong></td>
-            <td>{% if license.start_line == license.end_line %} {{ license.start_line }} {% else %} {{ license.start_line }} - {{ license.end_line }} {% endif %}</td>
-          </tr>
-          <tr>
-            <td><strong>Rule Identifier</strong></td>
-            <td>
-            {% if license.rule_text_url %}
-                <a href="{{ license.rule_text_url }}" target="_blank">{{ license.rule_identifier }}</a>
-            {% else %}
-                {{ license.rule_identifier }}
-            {% endif %}
-            </td>
-          </tr>
-          <tr>
-            <td><strong>Matcher</strong></td>
-            <td>{{ license.matcher }}</td>
-          </tr>
-          <tr>
-            <td><strong>Match Coverage</strong></td>
-            <td>{{ license.match_coverage }}</td>
-          </tr>
-          <tr>
-            <td><strong>Matched Length</strong></td>
-            <td>{{ license.matched_length }}</td>
-          </tr>
-          <tr>
-            <td><strong>Key(s)</strong></td>
-            <td>
-            {% for key in license.licenses %}
-                <a href="{{ key.reference_url }}" target="_blank"><span class="mr-2">{{ key.key }}</span></a>
-            {% endfor %}
-            </td>
-          </tr>
-          <tr>
-            <td><strong>Rule Relevance</strong></td>
-            <td>{{ license.rule_relevance }}</td>
-          </tr>
-          <tr>
-            <td><strong>Rule Length</strong></td>
-            <td>{{ license.rule_length }}</td>
-          </tr>
-        </tbody>
-      </table>
-    </section>
-    <footer class="modal-card-foot">
-      <button class="button is-outlined  has-text-weight-semibold">
-        {% include 'scantext/includes/license_report.html' with license=license %}
-      </button>
-    </footer>
-  </div>
-</div>
\ No newline at end of file
diff --git a/scantext/templates/scantext/includes/license_summary_detail.html b/scantext/templates/scantext/includes/license_summary_detail.html
index 6ed4d05ac..f1ed5d101 100644
--- a/scantext/templates/scantext/includes/license_summary_detail.html
+++ b/scantext/templates/scantext/includes/license_summary_detail.html
@@ -30,28 +30,48 @@
                   </tr>
                   <tr>
                     <td><strong>Matched Line(s)</strong></td>
-                    <td>{% if license.start_line == license.end_line %}{{ license.start_line }}{% else %}{{ license.start_line }}-{{ license.end_line }} {% endif %}</td>
+                    <td>{% if license.start_line == license.end_line %} {{ license.start_line }} {% else %} {{ license.start_line }} - {{ license.end_line }} {% endif %}</td>
+                  </tr>
+                  <tr>
+                    <td><strong>Rule Identifier</strong></td>
+                    <td>
+                    {% if license.rule_text_url %}
+                        <a href="{{ license.rule_text_url }}" target="_blank">{{ license.rule_identifier }}</a>
+                    {% else %}
+                        {{ license.rule_identifier }}
+                    {% endif %}
+                    </td>
+                  </tr>
+                  <tr>
+                    <td><strong>Matcher</strong></td>
+                    <td>{{ license.matcher }}</td>
+                  </tr>
+                  <tr>
+                    <td><strong>Match Coverage</strong></td>
+                    <td>{{ license.match_coverage }}</td>
                   </tr>
                   <tr>
                     <td><strong>Matched Length</strong></td>
                     <td>{{ license.matched_length }}</td>
                   </tr>
                   <tr>
-                    <td><strong>Matched Coverage</strong></td>
-                    <td>{{ license.match_coverage }}</td>
+                    <td><strong>Key(s)</strong></td>
+                    <td>
+                    {% for key in license.licenses %}
+                        <a href="{{ key.reference_url }}" target="_blank"><span class="mr-2">{{ key.key }}</span></a>
+                    {% endfor %}
+                    </td>
                   </tr>
                   <tr>
-                    <td><strong>Matcher</strong></td>
-                    <td>{{ license.matcher }}</td>
+                    <td><strong>Rule Relevance</strong></td>
+                    <td>{{ license.rule_relevance }}</td>
                   </tr>
                   <tr>
-                    <td><strong>Rule Identifer</strong></td>
-                    <td>{{ license.rule_identifier }}</td>
+                    <td><strong>Rule Length</strong></td>
+                    <td>{{ license.rule_length }}</td>
                   </tr>
                 </table>
-                <div class="panel-heading py-2 is-size-7 is-dark is-flex is-justify-content-space-between has-background-white">
-                  <span class="license-details-btn has-text-link" style="cursor: pointer;">View more details</span>
-                  {% include 'scantext/includes/license_detail_modal.html' with license=license %}
+                <div class="panel-heading py-2 is-size-7 is-dark is-flex has-background-white">
                   <!-- report license hyperlink -->
                   {% include 'scantext/includes/license_report.html' with license=license %}
                 </div>
diff --git a/scantext/templates/scantext/license_summary.html b/scantext/templates/scantext/license_summary.html
index 996635806..df0084dbf 100644
--- a/scantext/templates/scantext/license_summary.html
+++ b/scantext/templates/scantext/license_summary.html
@@ -12,9 +12,10 @@
       border: 1px solid #cdcdcd;
       border-radius: 4px;
       color: rgb(14, 16, 26);
-      font-size: 18px;
+      font-size: 16px;
       font-weight: 400;
       cursor: help;
+      padding: 20px;
     }
 
     .panel {
@@ -23,7 +24,7 @@
     }
 
     .license-token {
-      opacity: 0.6;
+      opacity: 0.3;
     }
 
     .highlight {
@@ -52,40 +53,27 @@
 
 {% block scripts %}
   <script type="text/javascript">
-    const detailBtns = document.querySelectorAll('.license-details-btn')
     const panelBlocks = document.querySelectorAll('.license-block')
     const panelBtns = document.querySelectorAll('.card-header-title')
     const tokens = document.querySelectorAll('.license-token')
-    const modalCards = document.querySelectorAll('.license-details-modal')
-    const closeModalBtns = document.querySelectorAll('.license-details-close-modal')
-
-    detailBtns.forEach((btn, index) => {
-      btn.addEventListener('click', (e) => {
-        e.preventDefault()
-        modalCards.forEach(modalcard => {
-          modalcard.style.display= 'none'
-        })
-        modalCards[index].style.display= 'block'
-      })
-    })
 
     // Click on panels => highlight both panel and tokens
 
-    panelBtns.forEach((btn, index) => {
-      btn.addEventListener('click', (e) => {
-        panelBlocks.forEach(blk => {
-          blk.classList.remove('has-background-info')
-          blk.classList.remove('has-text-white')
+    panelBtns.forEach((block, index) => {
+      block.addEventListener('click', (e) => {
+        panelBlocks.forEach((blk, idx) => {
+          blk.classList.remove(`matched${idx%7}`)
         })
 
         tokens.forEach((tkn, ind) => {
           tkn.classList.remove('highlight')
           id = tkn.getAttribute('data-id')
           if (id && id == index) {
+            color_id = id%7
             tkn.classList.add('highlight')
+            panelBlocks[id].classList.add(`matched${color_id}`)
           }
         })
-        panelBlocks[index].classList.add('has-background-info')
       })
     })
 
@@ -93,27 +81,23 @@
 
     tokens.forEach((tkn, index) => {
       tkn.addEventListener('click', (e) => {
-        panelBlocks.forEach(blk => {
-          blk.classList.remove('has-background-info')
+        panelBlocks.forEach((blk, idx) => {
+          blk.classList.remove(`matched${idx%7}`)
         })
+
         id = tkn.getAttribute('data-id') 
         tokens.forEach((tn, ind) => {
           tn.classList.remove('highlight')
           tid = tn.getAttribute('data-id')
           if (id && id == tid) {
+            color_id = tid%7
             tn.classList.add('highlight')
-            panelBlocks[tid].classList.add('has-background-info')
+            panelBlocks[tid].classList.add(`matched${color_id}`)
           }
         })
       })
     })
 
-    closeModalBtns.forEach((btn, index) => {
-      btn.addEventListener('click', (e) => {
-        e.preventDefault()
-        modalCards[index].style.display = 'none'
-      })
-    })
 
   </script>
 {% endblock %}
\ No newline at end of file

From 45acd642eab3b3f4efc60c08d150545985d6041c Mon Sep 17 00:00:00 2001
From: Akhil Raj <lf32.dev@gmail.com>
Date: Mon, 17 Oct 2022 14:35:11 +0530
Subject: [PATCH 55/59] Fix failing tests #450

Signed-off-by: Akhil Raj <lf32.dev@gmail.com>
---
 scantext/tests/test_views.py | 18 ++++++++++--------
 1 file changed, 10 insertions(+), 8 deletions(-)

diff --git a/scantext/tests/test_views.py b/scantext/tests/test_views.py
index c72619794..e38021221 100644
--- a/scantext/tests/test_views.py
+++ b/scantext/tests/test_views.py
@@ -23,15 +23,17 @@
 SCANCODE_BASE_URL = (
     "https://github.com/nexB/scancode-toolkit/tree/develop/src/licensedcode/data"
 )
+SPDX_LICENSE_URL = "https://spdx.org/licenses/{}"
+SCANCODE_LICENSEDB_URL = "https://scancode-licensedb.aboutcode.org/{}"
 
 
 class TestScantextViews(FileBasedTesting):
     test_data_dir = TEST_DATA_DIR
 
     def test_get_license_keys_count(self):
-        rule1 = models.Rule(license_expression="Apache-2.0", stored_text="1")
-        rule2 = models.Rule(license_expression="Apache-2.0 OR MIT", stored_text="2")
-        rule3 = models.Rule(license_expression="BSD AND GPL", stored_text="3")
+        rule1 = models.Rule(license_expression="Apache-2.0", text="1")
+        rule2 = models.Rule(license_expression="Apache-2.0 OR MIT", text="2")
+        rule3 = models.Rule(license_expression="BSD AND GPL", text="3")
 
         match1 = match.LicenseMatch(rule=rule1, ispan=Span(), qspan=Span())
         match2 = match.LicenseMatch(rule=rule2, ispan=Span(), qspan=Span())
@@ -43,7 +45,7 @@ def test_get_license_keys_count(self):
         self.assertEqual(response, expected)
 
     def test_get_rule_text_url_for_rule(self):
-        rule1 = models.Rule(license_expression="Apache-2.0", stored_text="1")
+        rule1 = models.Rule(license_expression="Apache-2.0", text="1")
         rule1.identifier = "Apache-2.0.RULE"
 
         response = get_rule_text_url(rule=rule1, base_url="http://example.com")
@@ -52,7 +54,7 @@ def test_get_rule_text_url_for_rule(self):
 
     def test_get_rule_text_url_for_license(self):
         rule1 = models.Rule(
-            license_expression="Apache-2.0", stored_text="1", is_from_license=True
+            license_expression="Apache-2.0", text="1", is_from_license=True
         )
         rule1.identifier = "Apache-2.0.LICENSE"
 
@@ -61,17 +63,17 @@ def test_get_rule_text_url_for_license(self):
         self.assertEqual(response, expected)
 
     def test_get_rule_text_url_for_spdx(self):
-        rule1 = models.SpdxRule(license_expression="Apache-2.0", stored_text="1")
+        rule1 = models.SpdxRule(license_expression="Apache-2.0", text="1")
         response = get_rule_text_url(rule=rule1, base_url="http://example.com")
         self.assertIsNone(response)
 
     def test_get_rule_text_url_for_unknown(self):
-        rule1 = models.UnknownRule(license_expression="Apache-2.0", stored_text="1")
+        rule1 = models.UnknownRule(license_expression="Apache-2.0", text="1")
         response = get_rule_text_url(rule=rule1, base_url="http://example.com")
         self.assertIsNone(response)
 
     def test_get_rule_text_url_with_default_base_url(self):
-        rule1 = models.Rule(license_expression="apache-2.0 or mit", stored_text="1")
+        rule1 = models.Rule(license_expression="apache-2.0 or mit", text="1")
         rule1.identifier = "apache-2.0_or_mit_48.RULE"
 
         response = get_rule_text_url(rule=rule1)

From b71994894b15cf70f09a7ef7621aa704e5d2a6ba Mon Sep 17 00:00:00 2001
From: Akhil Raj <lf32.dev@gmail.com>
Date: Mon, 17 Oct 2022 15:32:13 +0530
Subject: [PATCH 56/59] Fix width for dropdown #450

Signed-off-by: Akhil Raj <lf32.dev@gmail.com>
---
 .../includes/license_summary_detail.html       |  2 +-
 .../templates/scantext/license_summary.html    | 18 ++++++++++++++++++
 2 files changed, 19 insertions(+), 1 deletion(-)

diff --git a/scantext/templates/scantext/includes/license_summary_detail.html b/scantext/templates/scantext/includes/license_summary_detail.html
index f1ed5d101..ba19754d0 100644
--- a/scantext/templates/scantext/includes/license_summary_detail.html
+++ b/scantext/templates/scantext/includes/license_summary_detail.html
@@ -19,7 +19,7 @@
               </p>
             <div class="dropdown-menu">
               <div class="dropdown-content py-0">
-                <table class="panel-block table is-striped is-size-6 is-hoverable mb-0" style="white-space: nowrap;">
+                <table class="table is-striped is-size-6 is-hoverable mb-0" >
                   <tr>
                     <td><strong>License Expression</strong></td>
                     <td>{{ license.license_expression }}</td>
diff --git a/scantext/templates/scantext/license_summary.html b/scantext/templates/scantext/license_summary.html
index df0084dbf..de12a3dd6 100644
--- a/scantext/templates/scantext/license_summary.html
+++ b/scantext/templates/scantext/license_summary.html
@@ -34,6 +34,24 @@
     a {
       display: block;
     }
+
+    .dropdown-content, table {
+       width: 330px;
+       max-width: 430px;
+    }
+
+    tr {
+      display: flex;
+      justify-content: stretch;
+    }
+
+    td {
+      flex: 1;
+    }
+
+    td:nth-child(2) {
+      word-break: break-all;
+    }
   </style>
 {% endblock %}
 

From dd2e8abfc514e16f8cd00799cd07a475fefd0ea7 Mon Sep 17 00:00:00 2001
From: Akhil Raj <lf32.dev@gmail.com>
Date: Sun, 5 Feb 2023 17:58:11 +0530
Subject: [PATCH 57/59] Write input file either in chunks or text #450

This feature helps in working with files of type
text or other (example: binary)

Signed-off-by: Akhil Raj <lf32.dev@gmail.com>
---
 scantext/forms.py | 23 +++++++++++++++++++++++
 scantext/views.py | 30 ++++++------------------------
 2 files changed, 29 insertions(+), 24 deletions(-)

diff --git a/scantext/forms.py b/scantext/forms.py
index d08cf74cc..c01721749 100644
--- a/scantext/forms.py
+++ b/scantext/forms.py
@@ -20,6 +20,8 @@
 # ScanCode.io is a free software code scanning tool from nexB Inc. and others.
 # Visit https://github.com/nexB/scancode.io for support and download.
 
+import tempfile
+
 from django import forms
 
 
@@ -41,3 +43,24 @@ class LicenseScanForm(forms.Form):
             attrs={"class": "file-input", "multiple": False},
         ),
     )
+
+
+def handle_input_text(input_text, temp_dir):
+    # The flush in tempfile is required to ensure that the content is
+    # written to the disk before it's read by get_licenses function
+    with tempfile.NamedTemporaryFile(mode="w", dir=temp_dir, delete=False) as temp_file:
+        temp_file.write(input_text)
+        temp_file.flush()
+
+    return temp_file.name
+
+
+def handle_input_file(input_file, temp_dir):
+    # Save the input file to the temporary directory
+    with tempfile.NamedTemporaryFile(
+        mode="wb", dir=temp_dir, delete=False
+    ) as temp_file:
+        for chunk in input_file.chunks():
+            temp_file.write(chunk)
+
+    return temp_file.name
diff --git a/scantext/views.py b/scantext/views.py
index 81f7826f2..ce6daa328 100644
--- a/scantext/views.py
+++ b/scantext/views.py
@@ -20,12 +20,12 @@
 # ScanCode.io is a free software code scanning tool from nexB Inc. and others.
 # Visit https://github.com/nexB/scancode.io for support and download.
 
-import tempfile
-
 from django.contrib import messages
 from django.shortcuts import render
 
 from scantext.forms import LicenseScanForm
+from scantext.forms import handle_input_file
+from scantext.forms import handle_input_text
 from scantext.match_text import get_licenses
 
 
@@ -66,33 +66,15 @@ def license_scanview(request):
             },
         )
 
-    # The flush in tempfile is required to ensure that the content is
-    # written to the disk before it's read by get_licenses function
     from commoncode.fileutils import get_temp_dir
 
     temp_dir = get_temp_dir(prefix="scantext_")
     if input_text:
-        with tempfile.NamedTemporaryFile(mode="w", dir=temp_dir) as temp_file:
-            temp_file.write(input_text)
-            temp_file.flush()
-            expressions = get_licenses(location=temp_file.name)
+        file_path = handle_input_text(input_text, temp_dir)
     elif input_file:
-        try:
-            with tempfile.NamedTemporaryFile(mode="w", dir=temp_dir) as temp_file:
-                input_text = str(input_file.read(), "UTF-8")
-                temp_file.write(input_text)
-                temp_file.flush()
-                expressions = get_licenses(location=temp_file.name)
-        except UnicodeDecodeError:
-            message = "Please upload a valid text file."
-            messages.warning(request, message)
-            return render(
-                request,
-                "scantext/license_scan_form.html",
-                {
-                    "form": LicenseScanForm(),
-                },
-            )
+        file_path = handle_input_file(input_file, temp_dir)
+
+    expressions = get_licenses(location=file_path)
 
     if not expressions:
         message = "Could not detect any license."

From 5c30510b9526de87a601c257a7608e1980921dc1 Mon Sep 17 00:00:00 2001
From: Akhil Raj <lf32.dev@gmail.com>
Date: Sun, 5 Feb 2023 18:13:00 +0530
Subject: [PATCH 58/59] Move scan to utility dropdown #450

Signed-off-by: Akhil Raj <lf32.dev@gmail.com>
---
 .../templates/scanpipe/includes/navbar_header.html  | 13 ++++++++++---
 1 file changed, 10 insertions(+), 3 deletions(-)

diff --git a/scanpipe/templates/scanpipe/includes/navbar_header.html b/scanpipe/templates/scanpipe/includes/navbar_header.html
index 1293164b3..1e26aa6d4 100644
--- a/scanpipe/templates/scanpipe/includes/navbar_header.html
+++ b/scanpipe/templates/scanpipe/includes/navbar_header.html
@@ -9,9 +9,16 @@
       <a class="navbar-item" href="{% url 'project_list' %}">
         Projects
       </a>
-      <a class="navbar-item" href="{% url 'license_scan' %}">
-        Scan
-      </a>
+      <div class="navbar-item has-dropdown is-hoverable">
+        <a class="navbar-link">
+          Utilities
+        </a>
+        <div class="navbar-dropdown is-right">
+          <a class="navbar-item" href="{% url 'license_scan' %}">
+            Detect License
+          </a>
+        </div>
+      </div>
       <a class="navbar-item" href="https://scancodeio.readthedocs.org/" target="_blank">
         Documentation
       </a>

From 9787f4497307c610a71387856e21348db1470059 Mon Sep 17 00:00:00 2001
From: Akhil Raj <lf32.dev@gmail.com>
Date: Mon, 27 Feb 2023 14:22:44 +0530
Subject: [PATCH 59/59] (tests) code format is vaildated #450

Signed-off-by: Akhil Raj <lf32.dev@gmail.com>
---
 scantext/match_text.py | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/scantext/match_text.py b/scantext/match_text.py
index f7fa85e2c..c4e0567a1 100644
--- a/scantext/match_text.py
+++ b/scantext/match_text.py
@@ -272,7 +272,6 @@ def tokenize_matched_text(location, query_string, dictionary, start_line=1):
             # is essential as we need to realign the query-time tokenization
             # with the full text to report proper matches.
             if is_text and token_str and token_str.strip():
-
                 # we retokenize using the query tokenizer:
                 # 1. to lookup for is_known tokens in the index dictionary
 
@@ -281,7 +280,6 @@ def tokenize_matched_text(location, query_string, dictionary, start_line=1):
                 # regular query tokenizer ignores case and punctuations).
                 qtokenized = list(index_tokenizer(token_str))
                 if not qtokenized:
-
                     yield Token(
                         value=token_str,
                         line_num=line_num,
@@ -324,7 +322,6 @@ def tokenize_matched_text(location, query_string, dictionary, start_line=1):
                             pos=p,
                         )
             else:
-
                 yield Token(
                     value=token_str,
                     line_num=line_num,

Name	{{ license.name }}
Score	{{ license.score }}
Owner	{{ license.owner }}
Category	{{ license.category }}
SPDX Key	{{ license.spdx_license_key }}
Reference	{{ license.reference_url }}
Name	{{ license.name }}	Key	{{ license.key }}
Score	{{ license.score }}
Owner	{{ license.owner }}	Name	{{ license.name }}
Short Name	{{ license.short_name }}
Lines	{% if license.start_line == license.end_line %} Line {{ license.start_line }} {% else %} Lines {{ license.start_line }} - {{ license.end_line }} {% endif %}
Category	{{ license.category }}
SPDX Key	Reference	{{ license.reference_url }}
Exection	{{ license.is_exception }}
Is Unknown	{{ license.is_unknown }}
Owner	{{ license.owner }}
Homepage	{{ license.homepage_url }}
Text URL	{{ license.text_url }}
Scancode Text URL	{{ license.scancode_text_url }}
Scancode Data URL	{{ license.scancode_data_url }}
SPDX License Key	{{ license.spdx_license_key }}
Reference	{{ license.reference_url }}	SPDX URL	{{ license.spdx_url }}
Lines	{% if license.start_line == license.end_line %} Line {{ license.start_line }} {% else %} Lines {{ license.start_line }} - {{ license.end_line }} {% endif %}
Name	{{ license.name }}	Matched Rule	{{ license.rule }}
Key	{{ license.key }}
Score	{{ license.score }}
Owner	{{ license.owner }}	Name	{{ license.name }}
Short Name	{{ license.short_name }}
Category	{{ license.category }}
SPDX Key	{{ license.spdx_license_key }}	Lines	{% if license.start_line == license.end_line %} {{ license.start_line }} {% else %} {{ license.start_line }} - {{ license.end_line }} {% endif %}
Reference	{{ license.reference_url }}
Exection	{{ license.is_exception }}
Is Unknown	{{ license.is_unknown }}
Owner	{{ license.owner }}
Homepage	{{ license.homepage_url }}
Text URL	{{ license.text_url }}
Scancode Text URL	{{ license.scancode_text_url }}
Scancode Data URL	{{ license.scancode_data_url }}
SPDX License Key	{{ license.spdx_license_key }}
SPDX URL	{{ license.spdx_url }}
Rule	{{ license.rule }}	Key	{{ license.key }}	Score	{{ license.score }}
Name	{{ license.name }}	Short Name	{{ license.short_name }}	Category	{{ license.category }}
Line(s)	{% if license.start_line == license.end_line %} {{ license.start_line }} {% else %} {{ license.start_line }} - {{ license.end_line }} {% endif %}	Reference	{{ license.reference_url }}	Execption	{{ license.is_exception }}
Is Unknown	{{ license.is_unknown }}	Owner	{{ license.owner }}	Homepage	{{ license.homepage_url }}
Text URL	{{ license.text_url }}	Scancode Text URL	{{ license.scancode_text_url }}	Scancode Data URL	{{ license.scancode_data_url }}
SPDX License Key	{{ license.spdx_license_key }}	SPDX URL	{{ license.spdx_url }}	Matched Rule	{{ license.matched_rule }}
Score	{{ license.score }}
Matched Line(s)	{% if license.start_line == license.end_line %} {{ license.start_line }} {% else %} {{ license.start_line }} - {{ license.end_line }} {% endif %}
Rule Identifier	{% if license.rule_text_url %} @@ -26,10 +29,15 @@
Matcher	{{ license.matcher }}
Match Coverage	{{ license.match_coverage }}
Matched Length	{{ license.matched_length }}
Key(s)	@@ -37,13 +45,22 @@ {{ key.key }} {% endfor %}
Rule Relevance	{{ license.rule_relevance }}
Rule Length	{{ license.rule_length }}