diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index 359a24eb..29fa04b0 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -24,4 +24,4 @@ jobs:
     - run: py.test --cov .
     - env:
         GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
-      run: coveralls
+      run: coveralls --service=github
diff --git a/CHANGELOG.md b/CHANGELOG.md
index ce5eb884..b916ee1d 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -8,6 +8,7 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.
 
 ### Fixed
 
+- flattening: Uses much less memory by storing data in a embedded ZODB database, using ijson and using write only mode in pyopenxl.
 - use-titles: Use $ref'erring title if available https://github.com/OpenDataServices/flatten-tool/pull/368 
 - create-template --no-deprecated-fields: Did not work if deprecated element at same level as a $ref https://github.com/OpenDataServices/flatten-tool/issues/185#issuecomment-719587348
 
diff --git a/examples/bods/unflatten/expected/out.json b/examples/bods/unflatten/expected/out.json
index ed3c55d0..a51b1103 100644
--- a/examples/bods/unflatten/expected/out.json
+++ b/examples/bods/unflatten/expected/out.json
@@ -1,24 +1,24 @@
 [
     {
-        "statementID": "fbfd0547-d0c6-4a00-b559-5c5e91c34f5c",
-        "interests": [
-            {
-                "type": "shareholding",
-                "interestLevel": "direct",
-                "beneficialOwnershipOrControl": true,
-                "startDate": "2016-04-06",
-                "share": {
-                    "exact": 100
-                }
+    "statementID": "fbfd0547-d0c6-4a00-b559-5c5e91c34f5c",
+    "interests": [
+        {
+            "type": "shareholding",
+            "interestLevel": "direct",
+            "beneficialOwnershipOrControl": true,
+            "startDate": "2016-04-06",
+            "share": {
+                "exact": 100
             }
-        ],
-        "statementType": "ownershipOrControlStatement",
-        "statementDate": "2017-11-18",
-        "subject": {
-            "describedByEntityStatement": "1dc0e987-5c57-4a1c-b3ad-61353b66a9b7"
-        },
-        "interestedParty": {
-            "describedByPersonStatement": "019a93f1-e470-42e9-957b-03559861b2e2"
         }
+    ],
+    "statementType": "ownershipOrControlStatement",
+    "statementDate": "2017-11-18",
+    "subject": {
+        "describedByEntityStatement": "1dc0e987-5c57-4a1c-b3ad-61353b66a9b7"
+    },
+    "interestedParty": {
+        "describedByPersonStatement": "019a93f1-e470-42e9-957b-03559861b2e2"
     }
-]
\ No newline at end of file
+}
+]
diff --git a/examples/cafe/relationship-missing-ids/expected.json b/examples/cafe/relationship-missing-ids/expected.json
index 534569ed..c7beaa11 100644
--- a/examples/cafe/relationship-missing-ids/expected.json
+++ b/examples/cafe/relationship-missing-ids/expected.json
@@ -16,6 +16,10 @@
                 }
             ]
         },
+        {
+            "name": "Vegetarian Cafe",
+            "address": "42 Town Road, Bristol"
+        },
         {
             "id": "CAFE-VEG",
             "table": [
@@ -24,10 +28,6 @@
                 }
             ]
         },
-        {
-            "name": "Vegetarian Cafe",
-            "address": "42 Town Road, Bristol"
-        },
         {
             "table": [
                 {
diff --git a/examples/iati/expected.xml b/examples/iati/expected.xml
index 3677d49a..51cd0495 100644
--- a/examples/iati/expected.xml
+++ b/examples/iati/expected.xml
@@ -1,58 +1,57 @@
 <?xml version='1.0' encoding='utf-8'?>
-<iati-activities>
-  <!--XML generated by flatten-tool-->
-  <iati-activity>
-    <iati-identifier>AA-AAA-123456789-ABC123</iati-identifier>
-    <reporting-org ref="AA-AAA-123456789" type="40">
-      <narrative>Organisation name</narrative>
-    </reporting-org>
-    <title>
-      <narrative>A title</narrative>
-    </title>
-    <description>
-      <narrative>A description</narrative>
-    </description>
-    <participating-org ref="AA-AAA-123456789" role="1"/>
-    <activity-status code="2"/>
-    <activity-date iso-date="2011-10-01" type="1"/>
-    <recipient-country code="AF" percentage="40"/>
-    <recipient-country code="XK" percentage="60"/>
-    <transaction>
-      <transaction-type code="2"/>
-      <transaction-date iso-date="2012-01-01"/>
-      <value value-date="2012-01-01">10</value>
-    </transaction>
-    <transaction>
-      <transaction-type code="3"/>
-      <transaction-date iso-date="2012-03-03"/>
-      <value value-date="2012-03-03">20</value>
-    </transaction>
-  </iati-activity>
-  <iati-activity>
-    <iati-identifier>AA-AAA-123456789-ABC124</iati-identifier>
-    <reporting-org ref="AA-AAA-123456789" type="40">
-      <narrative>Organisation name</narrative>
-    </reporting-org>
-    <title>
-      <narrative>Another title</narrative>
-    </title>
-    <description>
-      <narrative>Another description</narrative>
-    </description>
-    <participating-org ref="AA-AAA-123456789" role="1"/>
-    <activity-status code="3"/>
-    <activity-date iso-date="2016-01-01" type="2"/>
-    <recipient-country code="AG" percentage="30"/>
-    <recipient-country code="XK" percentage="70"/>
-    <transaction>
-      <transaction-type code="2"/>
-      <transaction-date iso-date="2013-04-04"/>
-      <value value-date="2013-04-04">30</value>
-    </transaction>
-    <transaction>
-      <transaction-type code="3"/>
-      <transaction-date iso-date="2013-05-05"/>
-      <value value-date="2013-05-05">40</value>
-    </transaction>
-  </iati-activity>
-</iati-activities>
+<iati-activities><!--XML generated by flatten-tool-->
+<iati-activity>
+  <iati-identifier>AA-AAA-123456789-ABC123</iati-identifier>
+  <reporting-org ref="AA-AAA-123456789" type="40">
+    <narrative>Organisation name</narrative>
+  </reporting-org>
+  <title>
+    <narrative>A title</narrative>
+  </title>
+  <description>
+    <narrative>A description</narrative>
+  </description>
+  <participating-org ref="AA-AAA-123456789" role="1"/>
+  <activity-status code="2"/>
+  <activity-date iso-date="2011-10-01" type="1"/>
+  <recipient-country code="AF" percentage="40"/>
+  <recipient-country code="XK" percentage="60"/>
+  <transaction>
+    <transaction-type code="2"/>
+    <transaction-date iso-date="2012-01-01"/>
+    <value value-date="2012-01-01">10</value>
+  </transaction>
+  <transaction>
+    <transaction-type code="3"/>
+    <transaction-date iso-date="2012-03-03"/>
+    <value value-date="2012-03-03">20</value>
+  </transaction>
+</iati-activity>
+<iati-activity>
+  <iati-identifier>AA-AAA-123456789-ABC124</iati-identifier>
+  <reporting-org ref="AA-AAA-123456789" type="40">
+    <narrative>Organisation name</narrative>
+  </reporting-org>
+  <title>
+    <narrative>Another title</narrative>
+  </title>
+  <description>
+    <narrative>Another description</narrative>
+  </description>
+  <participating-org ref="AA-AAA-123456789" role="1"/>
+  <activity-status code="3"/>
+  <activity-date iso-date="2016-01-01" type="2"/>
+  <recipient-country code="AG" percentage="30"/>
+  <recipient-country code="XK" percentage="70"/>
+  <transaction>
+    <transaction-type code="2"/>
+    <transaction-date iso-date="2013-04-04"/>
+    <value value-date="2013-04-04">30</value>
+  </transaction>
+  <transaction>
+    <transaction-type code="3"/>
+    <transaction-date iso-date="2013-05-05"/>
+    <value value-date="2013-05-05">40</value>
+  </transaction>
+</iati-activity>
+</iati-activities>
\ No newline at end of file
diff --git a/examples/iati_multilang/expected.xml b/examples/iati_multilang/expected.xml
index e0c34da0..4949f699 100644
--- a/examples/iati_multilang/expected.xml
+++ b/examples/iati_multilang/expected.xml
@@ -1,60 +1,59 @@
 <?xml version='1.0' encoding='utf-8'?>
-<iati-activities>
-  <!--XML generated by flatten-tool-->
-  <iati-activity>
-    <iati-identifier>AA-AAA-123456789-ABC123</iati-identifier>
-    <reporting-org ref="AA-AAA-123456789" type="40">
-      <narrative>Organisation name</narrative>
-    </reporting-org>
-    <title>
-      <narrative xml:lang="en">A title, with comma</narrative>
-      <narrative xml:lang="fr">Un titre</narrative>
-    </title>
-    <description>
-      <narrative>A description</narrative>
-    </description>
-    <participating-org ref="AA-AAA-123456789" role="1"/>
-    <activity-status code="2"/>
-    <activity-date iso-date="2011-10-01" type="1"/>
-    <recipient-country code="AF" percentage="40"/>
-    <recipient-country code="XK" percentage="60"/>
-    <transaction>
-      <transaction-type code="2"/>
-      <transaction-date iso-date="2012-01-01"/>
-      <value value-date="2012-01-01">10</value>
-    </transaction>
-    <transaction>
-      <transaction-type code="3"/>
-      <transaction-date iso-date="2012-03-03"/>
-      <value value-date="2012-03-03">20</value>
-    </transaction>
-  </iati-activity>
-  <iati-activity>
-    <iati-identifier>AA-AAA-123456789-ABC124</iati-identifier>
-    <reporting-org ref="AA-AAA-123456789" type="40">
-      <narrative>Organisation name</narrative>
-    </reporting-org>
-    <title>
-      <narrative xml:lang="en">Another title; with semicolon</narrative>
-      <narrative xml:lang="fr">Un autre titre</narrative>
-    </title>
-    <description>
-      <narrative>Another description</narrative>
-    </description>
-    <participating-org ref="AA-AAA-123456789" role="1"/>
-    <activity-status code="3"/>
-    <activity-date iso-date="2016-01-01" type="2"/>
-    <recipient-country code="AG" percentage="30"/>
-    <recipient-country code="XK" percentage="70"/>
-    <transaction>
-      <transaction-type code="2"/>
-      <transaction-date iso-date="2013-04-04"/>
-      <value value-date="2013-04-04">30</value>
-    </transaction>
-    <transaction>
-      <transaction-type code="3"/>
-      <transaction-date iso-date="2013-05-05"/>
-      <value value-date="2013-05-05">40</value>
-    </transaction>
-  </iati-activity>
-</iati-activities>
+<iati-activities><!--XML generated by flatten-tool-->
+<iati-activity>
+  <iati-identifier>AA-AAA-123456789-ABC123</iati-identifier>
+  <reporting-org ref="AA-AAA-123456789" type="40">
+    <narrative>Organisation name</narrative>
+  </reporting-org>
+  <title>
+    <narrative xml:lang="en">A title, with comma</narrative>
+    <narrative xml:lang="fr">Un titre</narrative>
+  </title>
+  <description>
+    <narrative>A description</narrative>
+  </description>
+  <participating-org ref="AA-AAA-123456789" role="1"/>
+  <activity-status code="2"/>
+  <activity-date iso-date="2011-10-01" type="1"/>
+  <recipient-country code="AF" percentage="40"/>
+  <recipient-country code="XK" percentage="60"/>
+  <transaction>
+    <transaction-type code="2"/>
+    <transaction-date iso-date="2012-01-01"/>
+    <value value-date="2012-01-01">10</value>
+  </transaction>
+  <transaction>
+    <transaction-type code="3"/>
+    <transaction-date iso-date="2012-03-03"/>
+    <value value-date="2012-03-03">20</value>
+  </transaction>
+</iati-activity>
+<iati-activity>
+  <iati-identifier>AA-AAA-123456789-ABC124</iati-identifier>
+  <reporting-org ref="AA-AAA-123456789" type="40">
+    <narrative>Organisation name</narrative>
+  </reporting-org>
+  <title>
+    <narrative xml:lang="en">Another title; with semicolon</narrative>
+    <narrative xml:lang="fr">Un autre titre</narrative>
+  </title>
+  <description>
+    <narrative>Another description</narrative>
+  </description>
+  <participating-org ref="AA-AAA-123456789" role="1"/>
+  <activity-status code="3"/>
+  <activity-date iso-date="2016-01-01" type="2"/>
+  <recipient-country code="AG" percentage="30"/>
+  <recipient-country code="XK" percentage="70"/>
+  <transaction>
+    <transaction-type code="2"/>
+    <transaction-date iso-date="2013-04-04"/>
+    <value value-date="2013-04-04">30</value>
+  </transaction>
+  <transaction>
+    <transaction-type code="3"/>
+    <transaction-date iso-date="2013-05-05"/>
+    <value value-date="2013-05-05">40</value>
+  </transaction>
+</iati-activity>
+</iati-activities>
\ No newline at end of file
diff --git a/examples/iati_xml_comment/expected.xml b/examples/iati_xml_comment/expected.xml
index 8d131cd9..bc8305eb 100644
--- a/examples/iati_xml_comment/expected.xml
+++ b/examples/iati_xml_comment/expected.xml
@@ -1,60 +1,59 @@
 <?xml version='1.0' encoding='utf-8'?>
-<iati-activities>
-  <!--XML generated by ODS-->
-  <iati-activity>
-    <iati-identifier>AA-AAA-123456789-ABC123</iati-identifier>
-    <reporting-org ref="AA-AAA-123456789" type="40">
-      <narrative>Organisation name</narrative>
-    </reporting-org>
-    <title>
-      <narrative xml:lang="en">A title, with comma</narrative>
-      <narrative xml:lang="fr">Un titre</narrative>
-    </title>
-    <description>
-      <narrative>A description</narrative>
-    </description>
-    <participating-org ref="AA-AAA-123456789" role="1"/>
-    <activity-status code="2"/>
-    <activity-date iso-date="2011-10-01" type="1"/>
-    <recipient-country code="AF" percentage="40"/>
-    <recipient-country code="XK" percentage="60"/>
-    <transaction>
-      <transaction-type code="2"/>
-      <transaction-date iso-date="2012-01-01"/>
-      <value value-date="2012-01-01">10</value>
-    </transaction>
-    <transaction>
-      <transaction-type code="3"/>
-      <transaction-date iso-date="2012-03-03"/>
-      <value value-date="2012-03-03">20</value>
-    </transaction>
-  </iati-activity>
-  <iati-activity>
-    <iati-identifier>AA-AAA-123456789-ABC124</iati-identifier>
-    <reporting-org ref="AA-AAA-123456789" type="40">
-      <narrative>Organisation name</narrative>
-    </reporting-org>
-    <title>
-      <narrative xml:lang="en">Another title; with semicolon</narrative>
-      <narrative xml:lang="fr">Un autre titre</narrative>
-    </title>
-    <description>
-      <narrative>Another description</narrative>
-    </description>
-    <participating-org ref="AA-AAA-123456789" role="1"/>
-    <activity-status code="3"/>
-    <activity-date iso-date="2016-01-01" type="2"/>
-    <recipient-country code="AG" percentage="30"/>
-    <recipient-country code="XK" percentage="70"/>
-    <transaction>
-      <transaction-type code="2"/>
-      <transaction-date iso-date="2013-04-04"/>
-      <value value-date="2013-04-04">30</value>
-    </transaction>
-    <transaction>
-      <transaction-type code="3"/>
-      <transaction-date iso-date="2013-05-05"/>
-      <value value-date="2013-05-05">40</value>
-    </transaction>
-  </iati-activity>
-</iati-activities>
+<iati-activities><!--XML generated by ODS-->
+<iati-activity>
+  <iati-identifier>AA-AAA-123456789-ABC123</iati-identifier>
+  <reporting-org ref="AA-AAA-123456789" type="40">
+    <narrative>Organisation name</narrative>
+  </reporting-org>
+  <title>
+    <narrative xml:lang="en">A title, with comma</narrative>
+    <narrative xml:lang="fr">Un titre</narrative>
+  </title>
+  <description>
+    <narrative>A description</narrative>
+  </description>
+  <participating-org ref="AA-AAA-123456789" role="1"/>
+  <activity-status code="2"/>
+  <activity-date iso-date="2011-10-01" type="1"/>
+  <recipient-country code="AF" percentage="40"/>
+  <recipient-country code="XK" percentage="60"/>
+  <transaction>
+    <transaction-type code="2"/>
+    <transaction-date iso-date="2012-01-01"/>
+    <value value-date="2012-01-01">10</value>
+  </transaction>
+  <transaction>
+    <transaction-type code="3"/>
+    <transaction-date iso-date="2012-03-03"/>
+    <value value-date="2012-03-03">20</value>
+  </transaction>
+</iati-activity>
+<iati-activity>
+  <iati-identifier>AA-AAA-123456789-ABC124</iati-identifier>
+  <reporting-org ref="AA-AAA-123456789" type="40">
+    <narrative>Organisation name</narrative>
+  </reporting-org>
+  <title>
+    <narrative xml:lang="en">Another title; with semicolon</narrative>
+    <narrative xml:lang="fr">Un autre titre</narrative>
+  </title>
+  <description>
+    <narrative>Another description</narrative>
+  </description>
+  <participating-org ref="AA-AAA-123456789" role="1"/>
+  <activity-status code="3"/>
+  <activity-date iso-date="2016-01-01" type="2"/>
+  <recipient-country code="AG" percentage="30"/>
+  <recipient-country code="XK" percentage="70"/>
+  <transaction>
+    <transaction-type code="2"/>
+    <transaction-date iso-date="2013-04-04"/>
+    <value value-date="2013-04-04">30</value>
+  </transaction>
+  <transaction>
+    <transaction-type code="3"/>
+    <transaction-date iso-date="2013-05-05"/>
+    <value value-date="2013-05-05">40</value>
+  </transaction>
+</iati-activity>
+</iati-activities>
\ No newline at end of file
diff --git a/examples/receipt/source-map/expected/cell_source_map.json b/examples/receipt/source-map/expected/cell_source_map.json
index e4ac98cc..fa16d4ff 100644
--- a/examples/receipt/source-map/expected/cell_source_map.json
+++ b/examples/receipt/source-map/expected/cell_source_map.json
@@ -1,374 +1,374 @@
 {
     "cafe/0/id": [
-        [
-            "1_cafes",
-            "A",
-            2,
-            "id"
-        ],
-        [
-            "2_tables",
-            "A",
-            2,
-            "id"
-        ],
-        [
-            "2_tables",
-            "A",
-            3,
-            "id"
-        ],
-        [
-            "2_tables",
-            "A",
-            4,
-            "id"
-        ],
-        [
-            "3_dishes",
-            "A",
-            2,
-            "id"
-        ],
-        [
-            "3_dishes",
-            "A",
-            3,
-            "id"
-        ],
-        [
-            "3_dishes",
-            "A",
-            4,
-            "id"
-        ]
-    ],
+    [
+        "1_cafes",
+        "A",
+        2,
+        "id"
+    ],
+    [
+        "2_tables",
+        "A",
+        2,
+        "id"
+    ],
+    [
+        "2_tables",
+        "A",
+        3,
+        "id"
+    ],
+    [
+        "2_tables",
+        "A",
+        4,
+        "id"
+    ],
+    [
+        "3_dishes",
+        "A",
+        2,
+        "id"
+    ],
+    [
+        "3_dishes",
+        "A",
+        3,
+        "id"
+    ],
+    [
+        "3_dishes",
+        "A",
+        4,
+        "id"
+    ]
+],
     "cafe/0/name": [
-        [
-            "1_cafes",
-            "B",
-            2,
-            "name"
-        ]
-    ],
+    [
+        "1_cafes",
+        "B",
+        2,
+        "name"
+    ]
+],
     "cafe/0/table/0/dish/0/cost": [
-        [
-            "3_dishes",
-            "D",
-            2,
-            "table/0/dish/0/cost"
-        ]
-    ],
+    [
+        "3_dishes",
+        "D",
+        2,
+        "table/0/dish/0/cost"
+    ]
+],
     "cafe/0/table/0/dish/0/name": [
-        [
-            "3_dishes",
-            "C",
-            2,
-            "table/0/dish/0/name"
-        ]
-    ],
+    [
+        "3_dishes",
+        "C",
+        2,
+        "table/0/dish/0/name"
+    ]
+],
     "cafe/0/table/0/dish/1/cost": [
-        [
-            "3_dishes",
-            "D",
-            3,
-            "table/0/dish/0/cost"
-        ]
-    ],
+    [
+        "3_dishes",
+        "D",
+        3,
+        "table/0/dish/0/cost"
+    ]
+],
     "cafe/0/table/0/dish/1/name": [
-        [
-            "3_dishes",
-            "C",
-            3,
-            "table/0/dish/0/name"
-        ]
-    ],
+    [
+        "3_dishes",
+        "C",
+        3,
+        "table/0/dish/0/name"
+    ]
+],
     "cafe/0/table/0/id": [
-        [
-            "2_tables",
-            "B",
-            2,
-            "table/0/id"
-        ],
-        [
-            "3_dishes",
-            "B",
-            2,
-            "table/0/id"
-        ],
-        [
-            "3_dishes",
-            "B",
-            3,
-            "table/0/id"
-        ]
-    ],
+    [
+        "2_tables",
+        "B",
+        2,
+        "table/0/id"
+    ],
+    [
+        "3_dishes",
+        "B",
+        2,
+        "table/0/id"
+    ],
+    [
+        "3_dishes",
+        "B",
+        3,
+        "table/0/id"
+    ]
+],
     "cafe/0/table/0/number": [
-        [
-            "2_tables",
-            "C",
-            2,
-            "table/0/number"
-        ]
-    ],
+    [
+        "2_tables",
+        "C",
+        2,
+        "table/0/number"
+    ]
+],
     "cafe/0/table/1/id": [
-        [
-            "2_tables",
-            "B",
-            3,
-            "table/0/id"
-        ]
-    ],
+    [
+        "2_tables",
+        "B",
+        3,
+        "table/0/id"
+    ]
+],
     "cafe/0/table/1/number": [
-        [
-            "2_tables",
-            "C",
-            3,
-            "table/0/number"
-        ]
-    ],
+    [
+        "2_tables",
+        "C",
+        3,
+        "table/0/number"
+    ]
+],
     "cafe/0/table/2/dish/0/cost": [
-        [
-            "3_dishes",
-            "D",
-            4,
-            "table/0/dish/0/cost"
-        ]
-    ],
+    [
+        "3_dishes",
+        "D",
+        4,
+        "table/0/dish/0/cost"
+    ]
+],
     "cafe/0/table/2/dish/0/name": [
-        [
-            "3_dishes",
-            "C",
-            4,
-            "table/0/dish/0/name"
-        ]
-    ],
+    [
+        "3_dishes",
+        "C",
+        4,
+        "table/0/dish/0/name"
+    ]
+],
     "cafe/0/table/2/id": [
-        [
-            "2_tables",
-            "B",
-            4,
-            "table/0/id"
-        ],
-        [
-            "3_dishes",
-            "B",
-            4,
-            "table/0/id"
-        ]
-    ],
+    [
+        "2_tables",
+        "B",
+        4,
+        "table/0/id"
+    ],
+    [
+        "3_dishes",
+        "B",
+        4,
+        "table/0/id"
+    ]
+],
     "cafe/0/table/2/number": [
-        [
-            "2_tables",
-            "C",
-            4,
-            "table/0/number"
-        ]
-    ],
-    "cafe/1/id": [
-        [
-            "1_cafes",
-            "A",
-            3,
-            "id"
-        ],
-        [
-            "2_tables",
-            "A",
-            5,
-            "id"
-        ],
-        [
-            "2_tables",
-            "A",
-            6,
-            "id"
-        ],
-        [
-            "3_dishes",
-            "A",
-            5,
-            "id"
-        ]
-    ],
-    "cafe/1/name": [
-        [
-            "1_cafes",
-            "B",
-            3,
-            "name"
-        ]
+    [
+        "2_tables",
+        "C",
+        4,
+        "table/0/number"
+    ]
+],
+    "cafe/0": [
+    [
+        "1_cafes",
+        2
     ],
-    "cafe/1/table/0/dish/0/cost": [
-        [
-            "3_dishes",
-            "D",
-            5,
-            "table/0/dish/0/cost"
-        ]
+    [
+        "2_tables",
+        2
     ],
-    "cafe/1/table/0/dish/0/name": [
-        [
-            "3_dishes",
-            "C",
-            5,
-            "table/0/dish/0/name"
-        ]
+    [
+        "2_tables",
+        3
     ],
-    "cafe/1/table/0/id": [
-        [
-            "2_tables",
-            "B",
-            5,
-            "table/0/id"
-        ],
-        [
-            "3_dishes",
-            "B",
-            5,
-            "table/0/id"
-        ]
+    [
+        "2_tables",
+        4
     ],
-    "cafe/1/table/0/number": [
-        [
-            "2_tables",
-            "C",
-            5,
-            "table/0/number"
-        ]
+    [
+        "3_dishes",
+        2
     ],
-    "cafe/1/table/1/id": [
-        [
-            "2_tables",
-            "B",
-            6,
-            "table/0/id"
-        ]
-    ],
-    "cafe/1/table/1/number": [
-        [
-            "2_tables",
-            "C",
-            6,
-            "table/0/number"
-        ]
-    ],
-    "cafe/0": [
-        [
-            "1_cafes",
-            2
-        ],
-        [
-            "2_tables",
-            2
-        ],
-        [
-            "2_tables",
-            3
-        ],
-        [
-            "2_tables",
-            4
-        ],
-        [
-            "3_dishes",
-            2
-        ],
-        [
-            "3_dishes",
-            3
-        ],
-        [
-            "3_dishes",
-            4
-        ]
+    [
+        "3_dishes",
+        3
     ],
+    [
+        "3_dishes",
+        4
+    ]
+],
     "cafe/0/table/0/dish/0": [
-        [
-            "3_dishes",
-            2
-        ]
-    ],
+    [
+        "3_dishes",
+        2
+    ]
+],
     "cafe/0/table/0/dish/1": [
-        [
-            "3_dishes",
-            3
-        ]
-    ],
+    [
+        "3_dishes",
+        3
+    ]
+],
     "cafe/0/table/0": [
-        [
-            "2_tables",
-            2
-        ],
-        [
-            "3_dishes",
-            2
-        ],
-        [
-            "3_dishes",
-            3
-        ]
+    [
+        "2_tables",
+        2
     ],
-    "cafe/0/table/1": [
-        [
-            "2_tables",
-            3
-        ]
+    [
+        "3_dishes",
+        2
     ],
+    [
+        "3_dishes",
+        3
+    ]
+],
+    "cafe/0/table/1": [
+    [
+        "2_tables",
+        3
+    ]
+],
     "cafe/0/table/2/dish/0": [
-        [
-            "3_dishes",
-            4
-        ]
-    ],
+    [
+        "3_dishes",
+        4
+    ]
+],
     "cafe/0/table/2": [
-        [
-            "2_tables",
-            4
-        ],
-        [
-            "3_dishes",
-            4
-        ]
+    [
+        "2_tables",
+        4
     ],
+    [
+        "3_dishes",
+        4
+    ]
+],
+    "cafe/1/id": [
+    [
+        "1_cafes",
+        "A",
+        3,
+        "id"
+    ],
+    [
+        "2_tables",
+        "A",
+        5,
+        "id"
+    ],
+    [
+        "2_tables",
+        "A",
+        6,
+        "id"
+    ],
+    [
+        "3_dishes",
+        "A",
+        5,
+        "id"
+    ]
+],
+    "cafe/1/name": [
+    [
+        "1_cafes",
+        "B",
+        3,
+        "name"
+    ]
+],
+    "cafe/1/table/0/dish/0/cost": [
+    [
+        "3_dishes",
+        "D",
+        5,
+        "table/0/dish/0/cost"
+    ]
+],
+    "cafe/1/table/0/dish/0/name": [
+    [
+        "3_dishes",
+        "C",
+        5,
+        "table/0/dish/0/name"
+    ]
+],
+    "cafe/1/table/0/id": [
+    [
+        "2_tables",
+        "B",
+        5,
+        "table/0/id"
+    ],
+    [
+        "3_dishes",
+        "B",
+        5,
+        "table/0/id"
+    ]
+],
+    "cafe/1/table/0/number": [
+    [
+        "2_tables",
+        "C",
+        5,
+        "table/0/number"
+    ]
+],
+    "cafe/1/table/1/id": [
+    [
+        "2_tables",
+        "B",
+        6,
+        "table/0/id"
+    ]
+],
+    "cafe/1/table/1/number": [
+    [
+        "2_tables",
+        "C",
+        6,
+        "table/0/number"
+    ]
+],
     "cafe/1": [
-        [
-            "1_cafes",
-            3
-        ],
-        [
-            "2_tables",
-            5
-        ],
-        [
-            "2_tables",
-            6
-        ],
-        [
-            "3_dishes",
-            5
-        ]
+    [
+        "1_cafes",
+        3
     ],
-    "cafe/1/table/0/dish/0": [
-        [
-            "3_dishes",
-            5
-        ]
+    [
+        "2_tables",
+        5
+    ],
+    [
+        "2_tables",
+        6
     ],
+    [
+        "3_dishes",
+        5
+    ]
+],
+    "cafe/1/table/0/dish/0": [
+    [
+        "3_dishes",
+        5
+    ]
+],
     "cafe/1/table/0": [
-        [
-            "2_tables",
-            5
-        ],
-        [
-            "3_dishes",
-            5
-        ]
+    [
+        "2_tables",
+        5
     ],
+    [
+        "3_dishes",
+        5
+    ]
+],
     "cafe/1/table/1": [
-        [
-            "2_tables",
-            6
-        ]
+    [
+        "2_tables",
+        6
     ]
+]
 }
\ No newline at end of file
diff --git a/flatten-tool b/flatten-tool
index 328a7b7d..df6fda33 100755
--- a/flatten-tool
+++ b/flatten-tool
@@ -1,3 +1,4 @@
 #!/usr/bin/env python
 import flattentool.cli
+
 flattentool.cli.main()
diff --git a/flattentool/__init__.py b/flattentool/__init__.py
index 5c4f4bbf..4486fedb 100644
--- a/flattentool/__init__.py
+++ b/flattentool/__init__.py
@@ -1,16 +1,24 @@
 import codecs
 import json
+import os
 import sys
+import tempfile
+import uuid
 from collections import OrderedDict
 from decimal import Decimal
 
+import jsonstreams
+import lxml.etree
+import zc.zlibstorage
+import ZODB.FileStorage
+
 from flattentool.input import FORMATS as INPUT_FORMATS
 from flattentool.json_input import JSONParser
 from flattentool.lib import parse_sheet_configuration
 from flattentool.output import FORMATS as OUTPUT_FORMATS
 from flattentool.output import FORMATS_SUFFIX
 from flattentool.schema import SchemaParser
-from flattentool.xml_output import toxml
+from flattentool.xml_output import generate_schema_dict, write_comment, xml_item
 
 
 def create_template(
@@ -112,7 +120,7 @@ def flatten(
     else:
         schema_parser = None
 
-    parser = JSONParser(
+    with JSONParser(
         json_filename=input_name,
         root_list_path=None if root_is_list else root_list_path,
         schema_parser=schema_parser,
@@ -126,33 +134,33 @@ def flatten(
         preserve_fields=preserve_fields,
         remove_empty_schema_columns=remove_empty_schema_columns,
         truncation_length=truncation_length,
-    )
-    parser.parse()
-
-    def spreadsheet_output(spreadsheet_output_class, name):
-        spreadsheet_output = spreadsheet_output_class(
-            parser=parser,
-            main_sheet_name=main_sheet_name,
-            output_name=name,
-            sheet_prefix=sheet_prefix,
-        )
-        spreadsheet_output.write_sheets()
-
-    if output_format == "all":
-        if not output_name:
-            output_name = "flattened"
-        for format_name, spreadsheet_output_class in OUTPUT_FORMATS.items():
-            spreadsheet_output(
-                spreadsheet_output_class, output_name + FORMATS_SUFFIX[format_name]
+        persist=True,
+    ) as parser:
+
+        def spreadsheet_output(spreadsheet_output_class, name):
+            spreadsheet_output = spreadsheet_output_class(
+                parser=parser,
+                main_sheet_name=main_sheet_name,
+                output_name=name,
+                sheet_prefix=sheet_prefix,
             )
+            spreadsheet_output.write_sheets()
+
+        if output_format == "all":
+            if not output_name:
+                output_name = "flattened"
+            for format_name, spreadsheet_output_class in OUTPUT_FORMATS.items():
+                spreadsheet_output(
+                    spreadsheet_output_class, output_name + FORMATS_SUFFIX[format_name]
+                )
 
-    elif output_format in OUTPUT_FORMATS.keys():  # in dictionary of allowed formats
-        if not output_name:
-            output_name = "flattened" + FORMATS_SUFFIX[output_format]
-        spreadsheet_output(OUTPUT_FORMATS[output_format], output_name)
+        elif output_format in OUTPUT_FORMATS.keys():  # in dictionary of allowed formats
+            if not output_name:
+                output_name = "flattened" + FORMATS_SUFFIX[output_format]
+            spreadsheet_output(OUTPUT_FORMATS[output_format], output_name)
 
-    else:
-        raise Exception("The requested format is not available")
+        else:
+            raise Exception("The requested format is not available")
 
 
 # From http://bugs.python.org/issue16535
@@ -179,7 +187,103 @@ def decimal_default(o):
     raise TypeError(repr(o) + " is not JSON serializable")
 
 
+# This is to just to make ensure_ascii and default are correct for streaming library
+class CustomJSONEncoder(json.JSONEncoder):
+    def __init__(self, **kw):
+        super().__init__(**kw)
+        # overwrie these no matter the input to __init__
+        self.ensure_ascii = False
+        self.default = decimal_default
+
+
+def get_output(output_name, xml=False):
+    if not output_name:
+        if xml:
+            return sys.stdout.buffer
+        else:
+            return sys.stdout
+    if xml:
+        return codecs.open(output_name, "wb")
+    return codecs.open(output_name, "w", encoding="utf-8")
+
+
 def unflatten(
+    input_name,
+    output_name=None,
+    cell_source_map=None,
+    root_is_list=False,
+    xml=False,
+    **kw
+):
+    unflatten_kw = {
+        "output_name": output_name,
+        "cell_source_map": cell_source_map,
+        "root_is_list": root_is_list,
+        "xml": xml,
+    }
+    unflatten_kw.update(kw)
+
+    zodb_db_location = tempfile.gettempdir() + "/flattentool-" + str(uuid.uuid4())
+    zodb_storage = zc.zlibstorage.ZlibStorage(
+        ZODB.FileStorage.FileStorage(zodb_db_location)
+    )
+    db = ZODB.DB(zodb_storage)
+    unflatten_kw["db"] = db
+
+    try:
+        if xml:
+            with get_output(output_name, xml=True) as xml_file, lxml.etree.xmlfile(
+                xml_file, encoding="utf-8"
+            ) as xml_stream:
+                unflatten_kw["xml_stream"] = xml_stream
+                if cell_source_map:
+                    with get_output(
+                        cell_source_map
+                    ) as cell_source_map_file, jsonstreams.Stream(
+                        jsonstreams.Type.object,
+                        fd=cell_source_map_file,
+                        indent=4,
+                        encoder=CustomJSONEncoder,
+                    ) as cell_source_map_stream:
+                        unflatten_kw["cell_source_map_stream"] = cell_source_map_stream
+                        _unflatten(input_name, **unflatten_kw)
+                else:
+                    _unflatten(input_name, **unflatten_kw)
+
+        else:
+            json_stream_args = {"indent": 4, "encoder": CustomJSONEncoder}
+            if root_is_list:
+                json_stream_args["jtype"] = jsonstreams.Type.array
+            else:
+                json_stream_args["jtype"] = jsonstreams.Type.object
+
+            with get_output(output_name) as json_file, jsonstreams.Stream(
+                fd=json_file, **json_stream_args
+            ) as json_stream:
+                unflatten_kw["json_stream"] = json_stream
+                if cell_source_map:
+                    with get_output(
+                        cell_source_map
+                    ) as cell_source_map_file, jsonstreams.Stream(
+                        jsonstreams.Type.object,
+                        fd=cell_source_map_file,
+                        indent=4,
+                        encoder=CustomJSONEncoder,
+                    ) as cell_source_map_stream:
+                        unflatten_kw["cell_source_map_stream"] = cell_source_map_stream
+                        _unflatten(input_name, **unflatten_kw)
+                else:
+                    _unflatten(input_name, **unflatten_kw)
+
+    finally:
+        db.close()
+        os.remove(zodb_db_location)
+        os.remove(zodb_db_location + ".lock")
+        os.remove(zodb_db_location + ".index")
+        os.remove(zodb_db_location + ".tmp")
+
+
+def _unflatten(
     input_name,
     base_json=None,
     input_format=None,
@@ -205,6 +309,10 @@ def unflatten(
     disable_local_refs=False,
     xml_comment=None,
     truncation_length=3,
+    json_stream=None,
+    cell_source_map_stream=None,
+    xml_stream=None,
+    db=None,
     **_
 ):
     """
@@ -218,20 +326,18 @@ def unflatten(
     if metatab_name and base_json:
         raise Exception("Not allowed to use base_json with metatab")
 
-    if root_is_list:
-        base = None
-    elif base_json:
+    if not root_is_list and base_json:
         with open(base_json) as fp:
             base = json.load(fp, object_pairs_hook=OrderedDict)
-    else:
-        base = OrderedDict()
+            for key, value in base.items():
+                json_stream.write(key, value)
 
     base_configuration = parse_sheet_configuration(
         [item.strip() for item in default_configuration.split(",")]
     )
 
-    cell_source_map_data = OrderedDict()
     heading_source_map_data = OrderedDict()
+    meta_result = None
 
     if metatab_name and not root_is_list:
         spreadsheet_input_class = INPUT_FORMATS[input_format]
@@ -255,7 +361,7 @@ def unflatten(
         spreadsheet_input.encoding = encoding
         spreadsheet_input.read_sheets()
         (
-            result,
+            meta_result,
             cell_source_map_data_meta,
             heading_source_map_data_meta,
         ) = spreadsheet_input.fancy_unflatten(
@@ -264,7 +370,9 @@ def unflatten(
         )
         for key, value in (cell_source_map_data_meta or {}).items():
             ## strip off meta/0/ from start of source map as actually data is at top level
-            cell_source_map_data[key[7:]] = value
+            if cell_source_map_stream:
+                cell_source_map_stream.write(key[7:], value)
+
         for key, value in (heading_source_map_data_meta or {}).items():
             ## strip off meta/ from start of source map as actually data is at top level
             heading_source_map_data[key[5:]] = value
@@ -274,9 +382,6 @@ def unflatten(
             spreadsheet_input.sheet_configuration.get(metatab_name, {})
         )
 
-        if result:
-            base.update(result[0])
-
     if root_list_path is None:
         root_list_path = base_configuration.get("RootListPath", "main")
     if id_name is None:
@@ -309,54 +414,85 @@ def unflatten(
             spreadsheet_input.parser = parser
         spreadsheet_input.encoding = encoding
         spreadsheet_input.read_sheets()
-        (
-            result,
-            cell_source_map_data_main,
-            heading_source_map_data_main,
-        ) = spreadsheet_input.fancy_unflatten(
-            with_cell_source_map=cell_source_map,
-            with_heading_source_map=heading_source_map,
-        )
-        cell_source_map_data.update(cell_source_map_data_main or {})
-        heading_source_map_data.update(heading_source_map_data_main or {})
-        if root_is_list:
-            base = list(result)
-        else:
-            base[root_list_path] = list(result)
 
     if xml:
         xml_root_tag = base_configuration.get("XMLRootTag", "iati-activities")
-        xml_output = toxml(
-            base,
-            xml_root_tag,
-            xml_schemas=xml_schemas,
-            root_list_path=root_list_path,
-            xml_comment=xml_comment,
-        )
-        if output_name is None:
-            sys.stdout.buffer.write(xml_output)
-        else:
-            with codecs.open(output_name, "wb") as fp:
-                fp.write(xml_output)
-    else:
-        if output_name is None:
-            print(
-                json.dumps(base, indent=4, default=decimal_default, ensure_ascii=False)
-            )
-        else:
-            with codecs.open(output_name, "w", encoding="utf-8") as fp:
-                json.dump(
-                    base, fp, indent=4, default=decimal_default, ensure_ascii=False
-                )
-    if cell_source_map:
-        with codecs.open(cell_source_map, "w", encoding="utf-8") as fp:
-            json.dump(
-                cell_source_map_data,
-                fp,
-                indent=4,
-                default=decimal_default,
-                ensure_ascii=False,
-            )
+
+        if not metatab_only:
+            xml_stream.write_declaration()
+            with xml_stream.element(xml_root_tag):
+                write_comment(xml_stream, xml_comment)
+
+                for (
+                    single_result,
+                    cell_source_map_data_main,
+                    heading_source_map_data_main,
+                ) in spreadsheet_input.unflatten_with_storage(
+                    with_cell_source_map=cell_source_map,
+                    with_heading_source_map=heading_source_map,
+                    db=db,
+                ):
+
+                    schema_dict = None
+                    if xml_schemas:
+                        schema_dict = generate_schema_dict(xml_schemas, root_list_path)
+
+                    for item in single_result:
+                        xml_item(xml_stream, item, root_list_path, schema_dict)
+
+                    if cell_source_map_stream and cell_source_map_data_main:
+                        for key, value in cell_source_map_data_main.items():
+                            cell_source_map_stream.write(key, value)
+
+                    for key, value in (heading_source_map_data_main or {}).items():
+                        if key in heading_source_map_data:
+                            for item in heading_source_map_data_main[key]:
+                                if item not in heading_source_map_data[key]:
+                                    heading_source_map_data[key].append(item)
+                        else:
+                            heading_source_map_data[key] = heading_source_map_data_main[
+                                key
+                            ]
+
+    if not xml:
+        if meta_result:
+            for key, value in meta_result[0].items():
+                json_stream.write(key, value)
+
+        if not metatab_only:
+            if not root_is_list:
+                list_stream = json_stream.subarray(root_list_path)
+            else:
+                list_stream = json_stream
+
+            for (
+                single_result,
+                cell_source_map_data_main,
+                heading_source_map_data_main,
+            ) in spreadsheet_input.unflatten_with_storage(
+                with_cell_source_map=cell_source_map,
+                with_heading_source_map=heading_source_map,
+                db=db,
+            ):
+
+                if cell_source_map_stream and cell_source_map_data_main:
+                    for key, value in cell_source_map_data_main.items():
+                        cell_source_map_stream.write(key, value)
+
+                for item in single_result:
+                    list_stream.write(item)
+
+                for key, value in (heading_source_map_data_main or {}).items():
+                    if key in heading_source_map_data:
+                        for item in heading_source_map_data_main[key]:
+                            if item not in heading_source_map_data[key]:
+                                heading_source_map_data[key].append(item)
+                    else:
+                        heading_source_map_data[key] = heading_source_map_data_main[key]
+
+            if not root_is_list:
+                list_stream.close()
+
     if heading_source_map:
         with codecs.open(heading_source_map, "w", encoding="utf-8") as fp:
             json.dump(
diff --git a/flattentool/input.py b/flattentool/input.py
index 62d59451..d09d7948 100644
--- a/flattentool/input.py
+++ b/flattentool/input.py
@@ -7,14 +7,19 @@
 
 import datetime
 import os
+import uuid
 from collections import OrderedDict, UserDict
 from csv import DictReader
 from csv import reader as csvreader
 from decimal import Decimal, InvalidOperation
 from warnings import warn
 
+import BTrees
 import openpyxl
+import persistent.list
 import pytz
+import transaction
+import ZODB
 from openpyxl.utils.cell import _get_column_letter
 
 from flattentool.exceptions import DataErrorWarning
@@ -258,6 +263,7 @@ def __init__(
         xml=False,
         base_configuration={},
         use_configuration=True,
+        persist=False,
     ):
         self.input_name = input_name
         self.root_list_path = root_list_path
@@ -275,6 +281,8 @@ def __init__(
         self.base_configuration = base_configuration or {}
         self.sheet_configuration = {}
         self.use_configuration = use_configuration
+        self.persist = persist
+        self.actual_headings = {}
 
     def get_sub_sheets_lines(self):
         for sub_sheet_name in self.sub_sheet_names:
@@ -306,13 +314,20 @@ def get_sheet_headings(self, sheet_name):
     def read_sheets(self):
         raise NotImplementedError
 
-    def do_unflatten(self):
+    def do_unflatten(self, sheet_lines=None):
         main_sheet_by_ocid = OrderedDict()
-        sheets = list(self.get_sub_sheets_lines())
+        if sheet_lines:
+            sheets = sheet_lines.items()
+        else:
+            sheets = list(self.get_sub_sheets_lines())
         for i, sheet in enumerate(sheets):
             sheet_name, lines = sheet
             try:
-                actual_headings = self.get_sheet_headings(sheet_name)
+                # cache headings
+                actual_headings = self.actual_headings.get(sheet_name)
+                if not actual_headings:
+                    actual_headings = self.get_sheet_headings(sheet_name)
+                    self.actual_headings[sheet_name] = actual_headings
                 # If sheet is empty or too many lines have been skipped
                 if not actual_headings:
                     continue
@@ -384,7 +399,14 @@ def do_unflatten(self):
             except NotImplementedError:
                 # The ListInput type used in the tests doesn't support getting headings.
                 actual_headings = None
-            for j, line in enumerate(lines):
+
+            if not sheet_lines:
+                lines_generator = enumerate(lines)
+            else:
+                # when sheet lines are supplied then get sheet row numbers out of dictionary
+                lines_generator = sorted(list(lines.items()))
+
+            for j, line in lines_generator:
                 if all(x is None or x == "" for x in line.values()):
                     # if all(x == '' for x in line.values()):
                     continue
@@ -452,14 +474,77 @@ def unflatten(self):
         result = extract_list_to_value(result)
         return result
 
-    def fancy_unflatten(self, with_cell_source_map, with_heading_source_map):
-        cell_tree = self.do_unflatten()
+    def unflatten_with_storage(
+        self, with_cell_source_map, with_heading_source_map, db=None
+    ):
+
+        if not db:
+            # If None, in memory storage is used.
+            db = ZODB.DB(None)
+
+        self.connection = db.open()
+        root = self.connection.root
+
+        # Each top level object is assigned an integer. This way we preseve ordering as much as possible
+        root.object_store = BTrees.IOBTree.BTree()
+
+        # this matches the top-level id field to its index value.
+        root.object_index = BTrees.OIBTree.BTree()
+
+        index = 0
+
+        for sheet, rows in self.get_sub_sheets_lines():
+            for row_numbar, row in enumerate(rows):
+
+                ##uuid to stop clash with any key for objects with no id
+                top_level_id = row.get(self.id_name) or str(uuid.uuid4())
+
+                current_index = root.object_index.get(top_level_id)
+
+                if current_index is None:
+                    current_index = index
+                    root.object_index[top_level_id] = current_index
+                    root.object_store[current_index] = persistent.list.PersistentList()
+                    index += 1
+
+                root.object_store[current_index].append((sheet, row_numbar, row))
+
+                if row_numbar != 0 and row_numbar % 1000 == 0:
+                    transaction.commit()
+                    self.connection.cacheMinimize()
+
+            transaction.commit()
+
+        self.connection.cacheMinimize()
+
+        for current_index, row_list in root.object_store.items():
+            sheet_lines = OrderedDict()
+            for sheet, row_numbar, row in row_list:
+                if sheet not in sheet_lines:
+                    sheet_lines[sheet] = OrderedDict()
+                sheet_lines[sheet][row_numbar] = row
+            yield self.fancy_unflatten(
+                with_cell_source_map, with_cell_source_map, sheet_lines, current_index
+            )
+
+            self.connection.cacheMinimize()
+
+    def fancy_unflatten(
+        self,
+        with_cell_source_map,
+        with_heading_source_map,
+        sheet_lines=None,
+        index=None,
+    ):
+        cell_tree = self.do_unflatten(sheet_lines=sheet_lines)
         result = extract_list_to_value(cell_tree)
         ordered_cell_source_map = None
         heading_source_map = None
         if with_cell_source_map or with_heading_source_map:
             cell_source_map = extract_list_to_error_path(
-                [] if self.root_is_list else [self.root_list_path], cell_tree
+                [] if self.root_is_list else [self.root_list_path],
+                cell_tree,
+                index=index,
             )
             ordered_items = sorted(cell_source_map.items())
             row_source_map = OrderedDict()
@@ -502,10 +587,12 @@ def fancy_unflatten(self, with_cell_source_map, with_heading_source_map):
         return result, ordered_cell_source_map, heading_source_map
 
 
-def extract_list_to_error_path(path, input):
+def extract_list_to_error_path(path, input, index=None):
     output = {}
+    if index:
+        assert len(input) <= 1
     for i, item in enumerate(input):
-        res = extract_dict_to_error_path(path + [i], item)
+        res = extract_dict_to_error_path(path + [index or i], item)
         for p in res:
             assert p not in output, _("Already have key {}").format(p)
             output[p] = res[p]
@@ -652,7 +739,14 @@ class BadXLSXZipFile(BadZipFile):
 class XLSXInput(SpreadsheetInput):
     def read_sheets(self):
         try:
-            self.workbook = openpyxl.load_workbook(self.input_name, data_only=True)
+            if self.vertical_orientation:
+                # read_only mode only works when reading rows not columns
+                self.workbook = openpyxl.load_workbook(self.input_name, data_only=True)
+            else:
+                self.workbook = openpyxl.load_workbook(
+                    self.input_name, data_only=True, read_only=True
+                )
+
         except BadZipFile as e:  # noqa
             # TODO when we have python3 only add 'from e' to show exception chain
             raise BadXLSXZipFile(
diff --git a/flattentool/json_input.py b/flattentool/json_input.py
index fa9634d8..79567c0e 100644
--- a/flattentool/json_input.py
+++ b/flattentool/json_input.py
@@ -7,18 +7,24 @@
 
 import codecs
 import copy
-import json
 import os
+import tempfile
+import uuid
 from collections import OrderedDict
 from decimal import Decimal
 from warnings import warn
 
+import BTrees.OOBTree
+import ijson
+import transaction
 import xmltodict
+import zc.zlibstorage
+import ZODB.FileStorage
 
 from flattentool.i18n import _
 from flattentool.input import path_search
 from flattentool.schema import make_sub_sheet_name
-from flattentool.sheet import Sheet
+from flattentool.sheet import PersistentSheet
 
 BASIC_TYPES = [str, bool, int, Decimal, type(None)]
 
@@ -112,9 +118,26 @@ def __init__(
         remove_empty_schema_columns=False,
         rollup=False,
         truncation_length=3,
+        persist=False,
     ):
+        if persist:
+            self.zodb_db_location = (
+                tempfile.gettempdir() + "/flattentool-" + str(uuid.uuid4())
+            )
+            zodb_storage = zc.zlibstorage.ZlibStorage(
+                ZODB.FileStorage.FileStorage(self.zodb_db_location)
+            )
+            self.db = ZODB.DB(zodb_storage)
+        else:
+            # If None, in memory storage is used.
+            self.db = ZODB.DB(None)
+
+        self.connection = self.db.open()
+        root = self.connection.root
+        root.sheet_store = BTrees.OOBTree.BTree()
+
         self.sub_sheets = {}
-        self.main_sheet = Sheet()
+        self.main_sheet = PersistentSheet(connection=self.connection, name="")
         self.root_list_path = root_list_path
         self.root_id = root_id
         self.use_titles = use_titles
@@ -125,9 +148,17 @@ def __init__(
         self.filter_value = filter_value
         self.remove_empty_schema_columns = remove_empty_schema_columns
         self.seen_paths = set()
+        self.persist = persist
 
         if schema_parser:
-            self.main_sheet = copy.deepcopy(schema_parser.main_sheet)
+            self.main_sheet = PersistentSheet.from_sheet(
+                schema_parser.main_sheet, self.connection
+            )
+            for sheet_name, sheet in list(self.sub_sheets.items()):
+                self.sub_sheets[sheet_name] = PersistentSheet.from_sheet(
+                    sheet, self.connection
+                )
+
             self.sub_sheets = copy.deepcopy(schema_parser.sub_sheets)
             if remove_empty_schema_columns:
                 # Don't use columns from the schema parser
@@ -194,18 +225,13 @@ def __init__(
                 _("Only one of json_file or root_json_dict should be supplied")
             )
 
-        if json_filename:
-            with codecs.open(json_filename, encoding="utf-8") as json_file:
-                try:
-                    self.root_json_dict = json.load(
-                        json_file, object_pairs_hook=OrderedDict, parse_float=Decimal
-                    )
-                except UnicodeError as err:
-                    raise BadlyFormedJSONErrorUTF8(*err.args)
-                except ValueError as err:
-                    raise BadlyFormedJSONError(*err.args)
-        else:
-            self.root_json_dict = root_json_dict
+        if not json_filename:
+            if self.root_list_path is None:
+                self.root_json_list = root_json_dict
+            else:
+                self.root_json_list = path_search(
+                    root_json_dict, self.root_list_path.split("/")
+                )
 
         if preserve_fields:
             # Extract fields to be preserved from input file (one path per line)
@@ -240,19 +266,37 @@ def __init__(
             self.preserve_fields = None
             self.preserve_fields_input = None
 
+        if json_filename:
+            if self.root_list_path is None:
+                path = "item"
+            else:
+                path = root_list_path.replace("/", ".") + ".item"
+
+            json_file = codecs.open(json_filename, encoding="utf-8")
+
+            self.root_json_list = ijson.items(json_file, path, map_type=OrderedDict)
+
+        try:
+            self.parse()
+        except ijson.common.IncompleteJSONError as err:
+            raise BadlyFormedJSONError(*err.args)
+        except UnicodeDecodeError as err:
+            raise BadlyFormedJSONErrorUTF8(*err.args)
+        finally:
+            if json_filename:
+                json_file.close()
+
     def parse(self):
-        if self.root_list_path is None:
-            root_json_list = self.root_json_dict
-        else:
-            root_json_list = path_search(
-                self.root_json_dict, self.root_list_path.split("/")
-            )
-        for json_dict in root_json_list:
+        for num, json_dict in enumerate(self.root_json_list):
             if json_dict is None:
                 # This is particularly useful for IATI XML, in order to not
                 # fall over on empty activity, e.g. <iati-activity/>
                 continue
             self.parse_json_dict(json_dict, sheet=self.main_sheet)
+            if num % 2000 == 0 and num != 0:
+                transaction.commit()
+
+        transaction.commit()
 
         if self.remove_empty_schema_columns:
             # Remove sheets with no lines of data
@@ -501,7 +545,9 @@ def parse_json_dict(
                             parent_name, key, truncation_length=self.truncation_length
                         )
                     if sub_sheet_name not in self.sub_sheets:
-                        self.sub_sheets[sub_sheet_name] = Sheet(name=sub_sheet_name)
+                        self.sub_sheets[sub_sheet_name] = PersistentSheet(
+                            name=sub_sheet_name, connection=self.connection
+                        )
 
                     for json_dict in value:
                         if json_dict is None:
@@ -518,4 +564,16 @@ def parse_json_dict(
                 raise ValueError(_("Unsupported type {}").format(type(value)))
 
         if top:
-            sheet.lines.append(flattened_dict)
+            sheet.append_line(flattened_dict)
+
+    def __enter__(self):
+        return self
+
+    def __exit__(self, type, value, traceback):
+        if self.persist:
+            self.connection.close()
+            self.db.close()
+            os.remove(self.zodb_db_location)
+            os.remove(self.zodb_db_location + ".lock")
+            os.remove(self.zodb_db_location + ".index")
+            os.remove(self.zodb_db_location + ".tmp")
diff --git a/flattentool/output.py b/flattentool/output.py
index b92b0d02..947ceac6 100644
--- a/flattentool/output.py
+++ b/flattentool/output.py
@@ -50,7 +50,7 @@ def close(self):
 
 class XLSXOutput(SpreadsheetOutput):
     def open(self):
-        self.workbook = openpyxl.Workbook()
+        self.workbook = openpyxl.Workbook(write_only=True)
 
     def write_sheet(self, sheet_name, sheet):
         sheet_header = list(sheet)
@@ -75,7 +75,6 @@ def write_sheet(self, sheet_name, sheet):
             worksheet.append(line)
 
     def close(self):
-        self.workbook.remove(self.workbook.active)
         self.workbook.save(self.output_name)
 
 
diff --git a/flattentool/sheet.py b/flattentool/sheet.py
index 05f2159a..df6b99be 100644
--- a/flattentool/sheet.py
+++ b/flattentool/sheet.py
@@ -1,3 +1,8 @@
+import copy
+
+import BTrees.IOBTree
+
+
 class Sheet(object):
     """
     An abstract representation of a single sheet of a spreadsheet.
@@ -8,10 +13,14 @@ def __init__(self, columns=None, root_id="", name=None):
         self.id_columns = []
         self.columns = columns if columns else []
         self.titles = {}
-        self.lines = []
+        self._lines = []
         self.root_id = root_id
         self.name = name
 
+    @property
+    def lines(self):
+        return self._lines
+
     def add_field(self, field, id_field=False):
         columns = self.id_columns if id_field else self.columns
         if field not in columns:
@@ -27,3 +36,39 @@ def __iter__(self):
             yield column
         for column in self.columns:
             yield column
+
+    def append_line(self, flattened_dict):
+        self._lines.append(flattened_dict)
+
+
+class PersistentSheet(Sheet):
+    """
+    A sheet that is persisted in ZODB database.
+
+    """
+
+    def __init__(self, columns=None, root_id="", name=None, connection=None):
+        super().__init__(columns=columns, root_id=root_id, name=name)
+        self.connection = connection
+        self.index = 0
+        connection.root.sheet_store[self.name] = BTrees.IOBTree.BTree()
+
+    @property
+    def lines(self):
+        for key, value in self.connection.root.sheet_store[self.name].items():
+            if key % 5000 == 0:
+                self.connection.cacheMinimize()
+            yield value
+
+    def append_line(self, flattened_dict):
+        self.connection.root.sheet_store[self.name][self.index] = flattened_dict
+        self.index += 1
+
+    @classmethod
+    def from_sheet(cls, sheet, connection):
+        instance = cls(name=sheet.name, connection=connection)
+        instance.id_columns = copy.deepcopy(sheet.id_columns)
+        instance.columns = copy.deepcopy(sheet.columns)
+        instance.titles = copy.deepcopy(sheet.titles)
+        instance.root_id = sheet.root_id
+        return instance
diff --git a/flattentool/tests/fixtures/iati-org.xml b/flattentool/tests/fixtures/iati-org.xml
index 5b181d0e..2ff00249 100644
--- a/flattentool/tests/fixtures/iati-org.xml
+++ b/flattentool/tests/fixtures/iati-org.xml
@@ -1,8 +1,7 @@
 <?xml version='1.0' encoding='utf-8'?>
-<iati-organisations>
-  <!--XML generated by flatten-tool-->
-  <iati-organisation>
-    <organisation-identifier>AA-AAA-123456789</organisation-identifier>
-    <reporting-org ref="AA-AAA-123456789" secondary-reporter="0" type="40"/>
-  </iati-organisation>
-</iati-organisations>
+<iati-organisations><!--XML generated by flatten-tool-->
+<iati-organisation>
+  <organisation-identifier>AA-AAA-123456789</organisation-identifier>
+  <reporting-org ref="AA-AAA-123456789" secondary-reporter="0" type="40"/>
+</iati-organisation>
+</iati-organisations>
\ No newline at end of file
diff --git a/flattentool/tests/test_docs.py b/flattentool/tests/test_docs.py
index 86d156a7..a98c4191 100644
--- a/flattentool/tests/test_docs.py
+++ b/flattentool/tests/test_docs.py
@@ -151,7 +151,7 @@ def _simplify_line(line):
 def _strip(output):
     # Don't worry about any extra blank lines at the end either
     outstr = str(output, "utf8").rstrip("\n")
-    return "\n".join(line.rstrip(" ") for line in outstr.split("\n"))
+    return "\n".join(line.strip() for line in outstr.split("\n"))
 
 
 # Useful for a coverage check - see developer docs for how to run the check
diff --git a/flattentool/tests/test_init.py b/flattentool/tests/test_init.py
index 511404db..124f12eb 100644
--- a/flattentool/tests/test_init.py
+++ b/flattentool/tests/test_init.py
@@ -660,10 +660,8 @@ def test_unflatten(tmpdir):
             ]
         ]
     }"""
-    assert lines_strip_whitespace(
-        tmpdir.join("cell_source_map.json").read()
-    ) == lines_strip_whitespace(expected)
     data = json.loads(expected)
+    assert json.loads(tmpdir.join("cell_source_map.json").read()) == data
     cells, rows = original_cell_and_row_locations(data)
     # Make sure every cell in the original appeared in the cell source map exactly once
     assert cells == [
@@ -855,10 +853,8 @@ def test_unflatten(tmpdir):
             ]
         ]
     }"""
-    assert lines_strip_whitespace(
-        tmpdir.join("heading_source_map.json").read()
-    ) == lines_strip_whitespace(expected_headings)
     heading_data = json.loads(expected_headings)
+    assert json.loads(tmpdir.join("heading_source_map.json").read()) == heading_data
     headings = original_headings(heading_data)
     # Make sure every heading in the original appeared in the heading source map exactly once
     assert headings == [
@@ -997,7 +993,9 @@ def test_unflatten_empty(tmpdir):
         tmpdir.join("release.json").read()
     ) == lines_strip_whitespace(
         """{
-        "main": []
+        "main": [
+
+    ]
     }"""
     )
 
diff --git a/flattentool/tests/test_json_input.py b/flattentool/tests/test_json_input.py
index 738d36bd..35357863 100644
--- a/flattentool/tests/test_json_input.py
+++ b/flattentool/tests/test_json_input.py
@@ -59,30 +59,29 @@ def test_jsonparser_arguments_exceptions(tmpdir):
 
 def test_json_filename(tmpdir):
     test_json = tmpdir.join("test.json")
-    test_json.write('{"a":"b"}')
+    test_json.write('[{"a":"b"}]')
     parser = JSONParser(json_filename=test_json.strpath)
-    assert parser.root_json_dict == {"a": "b"}
+    assert list(parser.main_sheet.lines) == [{"a": "b"}]
 
 
 def test_json_filename_utf8(tmpdir):
     test_json = tmpdir.join("test.json")
-    test_json.write_text('{"a":"éαГ😼𝒞人"}', encoding="utf-8")
+    test_json.write_text('[{"a":"éαГ😼𝒞人"}]', encoding="utf-8")
     parser = JSONParser(json_filename=test_json.strpath)
-    assert parser.root_json_dict == {"a": "éαГ😼𝒞人"}
+    assert list(parser.main_sheet.lines) == [{"a": "éαГ😼𝒞人"}]
 
 
 def test_json_filename_ordered(tmpdir):
     test_json = tmpdir.join("test.json")
-    test_json.write('{"a":"b", "c": "d"}')
+    test_json.write('[{"a":"b", "c": "d"}]')
     parser = JSONParser(json_filename=test_json.strpath)
-    assert list(parser.root_json_dict.items()) == [("a", "b"), ("c", "d")]
+    assert list(parser.main_sheet.lines) == [{"a": "b", "c": "d"}]
 
 
 def test_parse_empty_json_dict():
     parser = JSONParser(root_json_dict={})
-    parser.parse()
     assert list(parser.main_sheet) == []
-    assert parser.main_sheet.lines == []
+    assert list(parser.main_sheet.lines) == []
     assert parser.sub_sheets == {}
 
 
@@ -93,9 +92,8 @@ def test_parse_basic_json_dict():
             OrderedDict([("a", "e"), ("c", "f"),]),
         ]
     )
-    parser.parse()
     assert list(parser.main_sheet) == ["a", "c"]
-    assert parser.main_sheet.lines == [
+    assert list(parser.main_sheet.lines) == [
         {"a": "b", "c": "d"},
         {"a": "e", "c": "f"},
     ]
@@ -106,9 +104,8 @@ def test_parse_nested_dict_json_dict():
     parser = JSONParser(
         root_json_dict=[OrderedDict([("a", "b"), ("c", OrderedDict([("d", "e")])),])]
     )
-    parser.parse()
     assert list(parser.main_sheet) == ["a", "c/d"]
-    assert parser.main_sheet.lines == [{"a": "b", "c/d": "e"}]
+    assert list(parser.main_sheet.lines) == [{"a": "b", "c/d": "e"}]
     assert parser.sub_sheets == {}
 
 
@@ -116,9 +113,8 @@ def test_parse_nested_list_json_dict():
     parser = JSONParser(
         root_json_dict=[OrderedDict([("a", "b"), ("c", [OrderedDict([("d", "e")])]),])]
     )
-    parser.parse()
     assert list(parser.main_sheet) == ["a"]
-    assert parser.main_sheet.lines == [{"a": "b"}]
+    assert list(parser.main_sheet.lines) == [{"a": "b"}]
     listify(parser.sub_sheets) == {"c": ["d"]}
     parser.sub_sheets["c"].lines == [{"d": "e"}]
 
@@ -127,9 +123,8 @@ def test_parse_array():
     parser = JSONParser(
         root_json_dict=[OrderedDict([("testarray", ["item", "anotheritem", 42])])]
     )
-    parser.parse()
     assert list(parser.main_sheet) == ["testarray"]
-    assert parser.main_sheet.lines == [{"testarray": "item;anotheritem;42"}]
+    assert list(parser.main_sheet.lines) == [{"testarray": "item;anotheritem;42"}]
     assert parser.sub_sheets == {}
 
 
@@ -138,9 +133,8 @@ def test_root_list_path():
         root_json_dict={"custom_key": [OrderedDict([("a", "b"), ("c", "d"),])]},
         root_list_path="custom_key",
     )
-    parser.parse()
     assert list(parser.main_sheet) == ["a", "c"]
-    assert parser.main_sheet.lines == [{"a": "b", "c": "d"}]
+    assert list(parser.main_sheet.lines) == [{"a": "b", "c": "d"}]
     assert parser.sub_sheets == {}
 
 
@@ -169,11 +163,12 @@ def test_parse_ids(self):
             ],
             root_id="ocid",
         )
-        parser.parse()
         assert list(parser.main_sheet) == ["ocid", "id", "a", "f/g"]
-        assert parser.main_sheet.lines == [{"ocid": 1, "id": 2, "a": "b", "f/g": "h"}]
+        assert list(parser.main_sheet.lines) == [
+            {"ocid": 1, "id": 2, "a": "b", "f/g": "h"}
+        ]
         listify(parser.sub_sheets) == {"c": ["ocid", "id", "c/0/id", "c/0/d"]}
-        assert parser.sub_sheets["c"].lines == [
+        assert list(parser.sub_sheets["c"].lines) == [
             {"ocid": 1, "id": 2, "c/0/id": 3, "c/0/d": "e"},
             {"ocid": 1, "id": 2, "c/0/id": 3, "c/0/d": "e2"},
         ]
@@ -212,9 +207,8 @@ def test_parse_ids_subsheet(self):
             ],
             root_id="ocid",
         )
-        parser.parse()
         assert list(parser.main_sheet) == ["ocid", "id"]
-        assert parser.main_sheet.lines == [{"ocid": 1, "id": 2,}]
+        assert list(parser.main_sheet.lines) == [{"ocid": 1, "id": 2,}]
         assert listify(parser.sub_sheets) == {
             "testnest": [
                 "ocid",
@@ -225,7 +219,7 @@ def test_parse_ids_subsheet(self):
             ],
             "tes_c": ["ocid", "id", "testnest/0/id", "testnest/0/c/0/d"],
         }
-        assert parser.sub_sheets["testnest"].lines == [
+        assert list(parser.sub_sheets["testnest"].lines) == [
             {
                 "ocid": 1,
                 "id": 2,
@@ -234,7 +228,7 @@ def test_parse_ids_subsheet(self):
                 "testnest/0/f/g": "h",
             },
         ]
-        assert parser.sub_sheets["tes_c"].lines == [
+        assert list(parser.sub_sheets["tes_c"].lines) == [
             {"ocid": 1, "id": 2, "testnest/0/id": 3, "testnest/0/c/0/d": "e"},
             {"ocid": 1, "id": 2, "testnest/0/id": 3, "testnest/0/c/0/d": "e2"},
         ]
@@ -271,15 +265,14 @@ def test_parse_ids_nested(self):
             ],
             root_id="ocid",
         )
-        parser.parse()
         assert list(parser.main_sheet) == ["ocid", "id", "a", "testnest/id", "f/g"]
-        assert parser.main_sheet.lines == [
+        assert list(parser.main_sheet.lines) == [
             {"ocid": 1, "id": 2, "a": "b", "testnest/id": 3, "f/g": "h"}
         ]
         assert listify(parser.sub_sheets) == {
             "tes_c": ["ocid", "id", "testnest/id", "testnest/c/0/d"]
         }
-        assert parser.sub_sheets["tes_c"].lines == [
+        assert list(parser.sub_sheets["tes_c"].lines) == [
             {"ocid": 1, "id": 2, "testnest/id": 3, "testnest/c/0/d": "e"},
             {"ocid": 1, "id": 2, "testnest/id": 3, "testnest/c/0/d": "e2"},
         ]
@@ -326,9 +319,8 @@ def test_sub_sheets(self, tmpdir, remove_empty_schema_columns):
             schema_parser=schema_parser,
             remove_empty_schema_columns=remove_empty_schema_columns,
         )
-        parser.parse()
         assert list(parser.main_sheet) == ["a"]
-        assert parser.main_sheet.lines == [{"a": "b"}]
+        assert list(parser.main_sheet.lines) == [{"a": "b"}]
         assert len(parser.sub_sheets) == 2 if not remove_empty_schema_columns else 1
         if not remove_empty_schema_columns:
             assert list(parser.sub_sheets["c"]) == list(["ocid", "c/0/d", "c/0/f"])
@@ -352,11 +344,10 @@ def test_column_matching(self, tmpdir):
         schema_parser = SchemaParser(schema_filename=test_schema.strpath)
         schema_parser.parse()
         parser = JSONParser(
-            root_json_dict=[OrderedDict([("c", ["d"]),])], schema_parser=schema_parser
+            root_json_dict=[OrderedDict([("c", ["d"]),])], schema_parser=schema_parser,
         )
-        parser.parse()
         assert list(parser.main_sheet) == ["c"]
-        assert parser.main_sheet.lines == [{"c": "d"}]
+        assert list(parser.main_sheet.lines) == [{"c": "d"}]
         assert len(parser.sub_sheets) == 0
 
     def test_rollup(self):
@@ -390,9 +381,8 @@ def test_rollup(self):
             root_id="ocid",
             rollup=True,
         )
-        parser.parse()
         assert list(parser.main_sheet) == ["testA/0/testB"]
-        assert parser.main_sheet.lines == [{"testA/0/testB": "1"}]
+        assert list(parser.main_sheet.lines) == [{"testA/0/testB": "1"}]
         assert len(parser.sub_sheets) == 1
         assert set(parser.sub_sheets["testA"]) == set(
             ["ocid", "testA/0/testB", "testA/0/testC"]
@@ -438,9 +428,8 @@ def test_rollup_multiple_values(self, recwarn):
             schema_parser=schema_parser,
             rollup=True,
         )
-        parser.parse()
         assert list(parser.main_sheet) == ["testA/0/testB"]
-        assert parser.main_sheet.lines == [
+        assert list(parser.main_sheet.lines) == [
             {
                 "testA/0/testB": "WARNING: More than one value supplied, consult the relevant sub-sheet for the data."
             }
@@ -502,7 +491,6 @@ def test_two_parents(self):
             ],
             schema_parser=schema_parser,
         )
-        parser.parse()
         assert set(parser.main_sheet) == set()
         assert set(parser.sub_sheets) == set(
             ["Atest", "Dtest", "Ate_Btest", "Dte_Btest"]
@@ -547,11 +535,12 @@ def test_parse_ids(self):
             ],
             root_id="custom",
         )
-        parser.parse()
         assert list(parser.main_sheet) == ["custom", "id", "a", "f/g"]
-        assert parser.main_sheet.lines == [{"custom": 1, "id": 2, "a": "b", "f/g": "h"}]
+        assert list(parser.main_sheet.lines) == [
+            {"custom": 1, "id": 2, "a": "b", "f/g": "h"}
+        ]
         assert listify(parser.sub_sheets) == {"c": ["custom", "id", "c/0/id", "c/0/d"]}
-        assert parser.sub_sheets["c"].lines == [
+        assert list(parser.sub_sheets["c"].lines) == [
             {"custom": 1, "id": 2, "c/0/id": 3, "c/0/d": "e"},
             {"custom": 1, "id": 2, "c/0/id": 3, "c/0/d": "e2"},
         ]
@@ -590,9 +579,8 @@ def test_parse_ids_subsheet(self):
             ],
             root_id="custom",
         )
-        parser.parse()
         assert list(parser.main_sheet) == ["custom", "id"]
-        assert parser.main_sheet.lines == [{"custom": 1, "id": 2,}]
+        assert list(parser.main_sheet.lines) == [{"custom": 1, "id": 2,}]
         assert listify(parser.sub_sheets) == {
             "testnest": [
                 "custom",
@@ -603,7 +591,7 @@ def test_parse_ids_subsheet(self):
             ],
             "tes_c": ["custom", "id", "testnest/0/id", "testnest/0/c/0/d"],
         }
-        assert parser.sub_sheets["testnest"].lines == [
+        assert list(parser.sub_sheets["testnest"].lines) == [
             {
                 "custom": 1,
                 "id": 2,
@@ -612,7 +600,7 @@ def test_parse_ids_subsheet(self):
                 "testnest/0/f/g": "h",
             },
         ]
-        assert parser.sub_sheets["tes_c"].lines == [
+        assert list(parser.sub_sheets["tes_c"].lines) == [
             {"custom": 1, "id": 2, "testnest/0/id": 3, "testnest/0/c/0/d": "e"},
             {"custom": 1, "id": 2, "testnest/0/id": 3, "testnest/0/c/0/d": "e2"},
         ]
@@ -649,15 +637,14 @@ def test_parse_ids_nested(self):
             ],
             root_id="custom",
         )
-        parser.parse()
         assert list(parser.main_sheet) == ["custom", "id", "a", "testnest/id", "f/g"]
-        assert parser.main_sheet.lines == [
+        assert list(parser.main_sheet.lines) == [
             {"custom": 1, "id": 2, "a": "b", "testnest/id": 3, "f/g": "h"}
         ]
         assert listify(parser.sub_sheets) == {
             "tes_c": ["custom", "id", "testnest/id", "testnest/c/0/d"]
         }
-        assert parser.sub_sheets["tes_c"].lines == [
+        assert list(parser.sub_sheets["tes_c"].lines) == [
             {"custom": 1, "id": 2, "testnest/id": 3, "testnest/c/0/d": "e"},
             {"custom": 1, "id": 2, "testnest/id": 3, "testnest/c/0/d": "e2"},
         ]
@@ -687,11 +674,10 @@ def test_parse_ids(self):
             ],
             root_id="",
         )
-        parser.parse()
         assert list(parser.main_sheet) == ["id", "a", "f/g"]
-        assert parser.main_sheet.lines == [{"id": 2, "a": "b", "f/g": "h"}]
+        assert list(parser.main_sheet.lines) == [{"id": 2, "a": "b", "f/g": "h"}]
         assert listify(parser.sub_sheets) == {"c": ["id", "c/0/id", "c/0/d"]}
-        assert parser.sub_sheets["c"].lines == [
+        assert list(parser.sub_sheets["c"].lines) == [
             {"id": 2, "c/0/id": 3, "c/0/d": "e"},
             {"id": 2, "c/0/id": 3, "c/0/d": "e2"},
         ]
@@ -729,17 +715,16 @@ def test_parse_ids_subsheet(self):
             ],
             root_id="",
         )
-        parser.parse()
         assert list(parser.main_sheet) == ["id"]
-        assert parser.main_sheet.lines == [{"id": 2,}]
+        assert list(parser.main_sheet.lines) == [{"id": 2,}]
         assert listify(parser.sub_sheets) == {
             "testnest": ["id", "testnest/0/id", "testnest/0/a", "testnest/0/f/g"],
             "tes_c": ["id", "testnest/0/id", "testnest/0/c/0/d"],
         }
-        assert parser.sub_sheets["testnest"].lines == [
+        assert list(parser.sub_sheets["testnest"].lines) == [
             {"id": 2, "testnest/0/id": 3, "testnest/0/a": "b", "testnest/0/f/g": "h",},
         ]
-        assert parser.sub_sheets["tes_c"].lines == [
+        assert list(parser.sub_sheets["tes_c"].lines) == [
             {"id": 2, "testnest/0/id": 3, "testnest/0/c/0/d": "e"},
             {"id": 2, "testnest/0/id": 3, "testnest/0/c/0/d": "e2"},
         ]
@@ -775,15 +760,14 @@ def test_parse_ids_nested(self):
             ],
             root_id="",
         )
-        parser.parse()
         assert list(parser.main_sheet) == ["id", "a", "testnest/id", "f/g"]
-        assert parser.main_sheet.lines == [
+        assert list(parser.main_sheet.lines) == [
             {"id": 2, "a": "b", "testnest/id": 3, "f/g": "h"}
         ]
         assert listify(parser.sub_sheets) == {
             "tes_c": ["id", "testnest/id", "testnest/c/0/d"]
         }
-        assert parser.sub_sheets["tes_c"].lines == [
+        assert list(parser.sub_sheets["tes_c"].lines) == [
             {"id": 2, "testnest/id": 3, "testnest/c/0/d": "e"},
             {"id": 2, "testnest/id": 3, "testnest/c/0/d": "e2"},
         ]
diff --git a/flattentool/tests/test_json_input_is_unflatten_reversed.py b/flattentool/tests/test_json_input_is_unflatten_reversed.py
index cdd6a9a5..3007e2e2 100644
--- a/flattentool/tests/test_json_input_is_unflatten_reversed.py
+++ b/flattentool/tests/test_json_input_is_unflatten_reversed.py
@@ -80,7 +80,6 @@ def test_flatten(
         schema_parser=schema_parser,
         **extra_kwargs
     )
-    parser.parse()
 
     expected_output_list = [
         inject_root_id(root_id, expected_output_dict)
@@ -188,7 +187,6 @@ def test_flatten_multiplesheets(
         schema_parser=schema_parser,
         **extra_kwargs
     )
-    parser.parse()
 
     expected_output_dict = OrderedDict(
         [
@@ -197,11 +195,11 @@ def test_flatten_multiplesheets(
         ]
     )
     output = {
-        sheet_name: sheet.lines
+        sheet_name: list(sheet.lines)
         for sheet_name, sheet in parser.sub_sheets.items()
-        if sheet.lines
+        if list(sheet.lines)
     }
-    output["custom_main"] = parser.main_sheet.lines
+    output["custom_main"] = list(parser.main_sheet.lines)
     assert output == expected_output_dict
 
 
diff --git a/flattentool/tests/test_output.py b/flattentool/tests/test_output.py
index 023ce09b..ea47407b 100644
--- a/flattentool/tests/test_output.py
+++ b/flattentool/tests/test_output.py
@@ -2,6 +2,7 @@
 from __future__ import unicode_literals
 
 import os
+import sys
 
 import openpyxl
 import pytest
@@ -41,7 +42,10 @@ def test_blank_sheets(tmpdir):
     wb = openpyxl.load_workbook(tmpdir.join("release.xlsx").strpath)
     assert wb.sheetnames == ["release"]
     rows = list(wb["release"].rows)
-    assert len(rows) == 0
+    # openpyxl fixed this bug but earler versions of python are stuck with it.
+    # remove when we no longer support 3.5
+    if sys.version_info >= (3, 6, 0):
+        assert len(rows) == 0
 
     # Check CSV is Empty
     assert tmpdir.join("release").listdir() == [
@@ -102,7 +106,7 @@ def test_empty_lines(tmpdir):
     subsheet = Sheet(root_id="ocid")
     subsheet.add_field("c")
     parser = MockParser(["a", "d"], {"b": subsheet})
-    parser.main_sheet.lines = []
+    parser.main_sheet._lines = []
     for format_name, spreadsheet_output_class in output.FORMATS.items():
         spreadsheet_output = spreadsheet_output_class(
             parser=parser,
@@ -147,8 +151,8 @@ def test_populated_lines(tmpdir):
     subsheet = Sheet(root_id="ocid")
     subsheet.add_field("c")
     parser = MockParser(["a"], {})
-    parser.main_sheet.lines = [{"a": "cell1"}, {"a": "cell2"}]
-    subsheet.lines = [{"c": "cell3"}, {"c": "cell4"}]
+    parser.main_sheet._lines = [{"a": "cell1"}, {"a": "cell2"}]
+    subsheet._lines = [{"c": "cell3"}, {"c": "cell4"}]
     parser.sub_sheets["b"] = subsheet
     for format_name, spreadsheet_output_class in output.FORMATS.items():
         spreadsheet_output = spreadsheet_output_class(
@@ -206,7 +210,7 @@ def test_populated_lines(tmpdir):
 
 def test_utf8(tmpdir):
     parser = MockParser(["é"], {})
-    parser.main_sheet.lines = [{"é": "éαГ😼𝒞人"}, {"é": "cell2"}]
+    parser.main_sheet._lines = [{"é": "éαГ😼𝒞人"}, {"é": "cell2"}]
     for format_name, spreadsheet_output_class in output.FORMATS.items():
         spreadsheet_output = spreadsheet_output_class(
             parser=parser,
diff --git a/flattentool/tests/test_xml_input.py b/flattentool/tests/test_xml_input.py
index 4ab90784..d0539749 100644
--- a/flattentool/tests/test_xml_input.py
+++ b/flattentool/tests/test_xml_input.py
@@ -15,9 +15,8 @@ def test_xml_empty():
         xml=True,
         id_name="iati-identifier",
     )
-    parser.parse()
     assert list(parser.main_sheet) == []
-    assert parser.main_sheet.lines == []
+    assert list(parser.main_sheet.lines) == []
     assert parser.sub_sheets == {}
 
 
@@ -30,7 +29,6 @@ def test_xml_basic_example():
         xml=True,
         id_name="iati-identifier",
     )
-    parser.parse()
     assert list(parser.main_sheet) == [
         "iati-identifier",
         "reporting-org/@ref",
@@ -44,7 +42,7 @@ def test_xml_basic_example():
         "activity-date/@iso-date",
         "activity-date/@type",
     ]
-    assert parser.main_sheet.lines == [
+    assert list(parser.main_sheet.lines) == [
         {
             "activity-date/@type": "1",
             "reporting-org/narrative": "Organisation name",
@@ -80,7 +78,7 @@ def test_xml_basic_example():
         "transaction/0/value/@value-date",
         "transaction/0/value",
     ]
-    assert parser.sub_sheets["transaction"].lines == [
+    assert list(parser.sub_sheets["transaction"].lines) == [
         {
             "transaction/0/value/@value-date": "2012-01-01",
             "iati-identifier": "AA-AAA-123456789-ABC123",
@@ -115,7 +113,7 @@ def test_xml_basic_example():
         "recipient-country/0/@code",
         "recipient-country/0/@percentage",
     ]
-    assert parser.sub_sheets["recipient-country"].lines == [
+    assert list(parser.sub_sheets["recipient-country"].lines) == [
         {
             "iati-identifier": "AA-AAA-123456789-ABC123",
             "recipient-country/0/@code": "AF",
@@ -148,9 +146,8 @@ def test_varyin_transaction_count():
         xml=True,
         id_name="iati-identifier",
     )
-    parser.parse()
     assert list(parser.main_sheet) == ["iati-identifier"]
-    assert parser.main_sheet.lines == [
+    assert list(parser.main_sheet.lines) == [
         {"iati-identifier": "AA-AAA-123456789-ABC123"},
         {"iati-identifier": "AA-AAA-123456789-ABC124"},
         {"iati-identifier": "AA-AAA-123456789-ABC125"},
@@ -162,7 +159,7 @@ def test_varyin_transaction_count():
         "transaction/0/value/@value-date",
         "transaction/0/value",
     ]
-    assert parser.sub_sheets["transaction"].lines == [
+    assert list(parser.sub_sheets["transaction"].lines) == [
         {
             "iati-identifier": "AA-AAA-123456789-ABC123",
             "transaction/0/value/@value-date": "2012-01-01",
@@ -251,16 +248,15 @@ def test_list_dict_consistency():
 
 
 def test_xml_whitespace():
-    parser = JSONParser(
-        json_filename="flattentool/tests/fixtures/narrative_whitespace.xml",
-        root_list_path="iati-activity",
-        schema_parser=None,
-        root_id="",
-        xml=True,
-        id_name="iati-identifier",
-    )
-
     try:
-        parser.parse()
+        parser = JSONParser(
+            json_filename="flattentool/tests/fixtures/narrative_whitespace.xml",
+            root_list_path="iati-activity",
+            schema_parser=None,
+            root_id="",
+            xml=True,
+            id_name="iati-identifier",
+        )
+        assert parser
     except TypeError as e:
         raise e
diff --git a/flattentool/xml_output.py b/flattentool/xml_output.py
index a689d14c..5ed788ca 100644
--- a/flattentool/xml_output.py
+++ b/flattentool/xml_output.py
@@ -111,3 +111,30 @@ def toxml(
         )
     else:
         return ET.tostring(root)
+
+
+def write_comment(xml_stream, xml_comment):
+    if xml_comment is None:
+        xml_comment = "XML generated by flatten-tool"
+    if xml_comment:
+        xml_stream.write(ET.Comment(xml_comment), pretty_print=True)
+
+
+def generate_schema_dict(xml_schemas, root_list_path):
+    return XMLSchemaWalker(xml_schemas).create_schema_dict(root_list_path)
+
+
+def xml_item(xml_stream, data, root_list_path="iati-activity", schema_dict=None):
+    nsmap = {
+        # This is "bound by definition" - see https://www.w3.org/XML/1998/namespace
+        "xml": "http://www.w3.org/XML/1998/namespace"
+    }
+
+    root = dict_to_xml(data, root_list_path, nsmap=nsmap)
+    if schema_dict:
+        sort_element(root, schema_dict)
+
+    xml_stream.write(root, pretty_print=True)
+    root = None
+
+    return schema_dict
diff --git a/setup.py b/setup.py
index 6379e337..24c37097 100644
--- a/setup.py
+++ b/setup.py
@@ -35,6 +35,10 @@ def run(self):
     "xmltodict",
     "lxml",
     "odfpy",
+    "zodb",
+    "zc.zlibstorage",
+    "ijson",
+    "jsonstreams",
 ]
 
 setup(