diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..5af28bb
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,2 @@
+*.pkl
+*.jsonl
diff --git a/README.md b/README.md
index 6ebd8bc..601c82d 100644
--- a/README.md
+++ b/README.md
@@ -18,7 +18,7 @@ Change into the `nwbib` directory:
 
     cd nwbib
 
-Load NWBib data from the Lobid API:
+Load sample NWBib data from the Lobid API:
 
     python3 nwbib_subjects_load.py
 
@@ -26,6 +26,10 @@ Run classification experiment:
 
     python3 nwbib_subjects_process.py
 
+Run bulk classification (first run takes some time):
+
+    python3 nwbib_subjects_bulk.py
+
 ## License
 
 [Eclipse Public License 2.0](http://www.eclipse.org/legal/epl-v20.html)
\ No newline at end of file
diff --git a/nwbib/nwbib-subjects-bulk-predict.csv b/nwbib/nwbib-subjects-bulk-predict.csv
new file mode 100644
index 0000000..fead71c
--- /dev/null
+++ b/nwbib/nwbib-subjects-bulk-predict.csv
@@ -0,0 +1,299 @@
+hbzId,subject
+HT019197733,s733030
+HT012856855,s503430
+BT000003431,s841070
+HT016712778,s768030
+BT000004673,s841070
+HT017700139,s843090
+BT000004675,s783000
+BT000004695,s767040
+BT000004706,s768010
+HT016444185,s799200
+BT000004925,s582050
+HT017683803,s555020
+HT019178040,s844030
+BT000002444,s797010
+BT000003102,s841070
+BT000003964,s797010
+BT000004262,s841070
+BT000004268,s240000
+BT000004863,s841070
+BT000006046,s797010
+HT016708916,s425020
+BT000004974,s611050
+BT000004986,s768010
+BT000004989,s240000
+BT000005009,s841070
+BT000005048,s841070
+BT000005057,s406130
+BT000002448,s768030
+HT017404363,s841070
+BT000002480,s797010
+BT000003147,s827000
+BT000003528,s841070
+BT000003980,s736030
+BT000004555,s824040
+HT016713438,s744000
+BT000004571,s768010
+BT000004575,s768010
+BT000004580,s841070
+BT000004737,s824040
+HT016708934,s425000
+BT000006049,s797010
+BT000005076,s102000
+BT000005099,s768030
+HT015605337,s228000
+BT000002515,s794010
+BT000002517,s732000
+BT000003180,s804000
+BT000004298,s555000
+BT000004577,s768010
+HT002142428,s841070
+HT017049016,s613050
+HT017049114,s768010
+HT018940690,s109000
+BT000002537,s798200
+BT000003187,s163050
+BT000003780,s554000
+HT014090049,s557000
+BT000004597,s102000
+BT000004605,s844030
+BT000004608,s841070
+BT000004780,s841070
+HT019135849,s220500
+BT000005162,s102000
+BT000005191,s102000
+BT000005203,s768030
+BT000005228,s228000
+BT000002567,s613050
+HT017390012,s240000
+BT000002590,s240000
+BT000004001,s841070
+BT000004024,s611090
+BT000004025,s611090
+BT000004658,s768010
+BT000004655,s768010
+BT000004721,s768010
+BT000006213,s768010
+BT000006219,s844030
+HT017049047,s841070
+BT000006389,s768030
+BT000006583,s841070
+HT009125364,s841070
+HT017012857,s102000
+BT000002759,s797010
+BT000002752,s543800
+BT000003244,s843042
+BT000004333,s126000
+BT000004345,s240000
+BT000004684,s768010
+BT000006489,s442000
+BT000005167,s768010
+BT000005171,s747010
+BT000006587,s882024
+BT000006590,s802060
+BT000006594,s841070
+BT000006623,s225000
+BT000006625,s613090
+BT000002775,s768030
+BT000002792,s841070
+HT015287390,s222000
+HT017698791,s102000
+BT000003548,s767040
+BT000003822,s768010
+HT015287495,s611010
+HT015287558,s841040
+HT015287597,s768030
+HT015287623,s768010
+HT015287626,s613010
+HT017286817,s613010
+BT000006522,s826000
+BT000005184,s740100
+BT000002823,s543000
+BT000002826,s228000
+BT000002829,s844030
+BT000002833,s797010
+BT000003289,s768010
+BT000003293,s613050
+BT000003294,s744000
+HT018749389,s841070
+HT017049848,s796000
+BT000006413,s841070
+BT000006486,s768010
+BT000006565,s226000
+BT000005339,s768030
+HT017421266,s704063
+BT000003305,s768030
+HT017015640,s797010
+HT017049481,s788000
+BT000006595,s823000
+BT000005369,s163080
+HT013987261,s544610
+HT017012889,s240000
+BT000005532,s841070
+BT000005800,s844050
+BT000005802,s846000
+BT000005803,s844030
+BT000005804,s844050
+BT000005819,s240000
+BT000003643,s217030
+BT000003852,s768010
+BT000004417,s768010
+BT000004424,s768010
+BT000005415,s102000
+HT017013602,s844200
+BT000005423,s102000
+HT017051937,s706042
+BT000003856,s768010
+BT000003874,s797010
+BT000003895,s532050
+BT000003898,s844200
+BT000004124,s841070
+BT000006574,s841070
+BT000006575,s762000
+BT000006640,s841070
+BT000002932,s613040
+BT000002952,s797010
+BT000003872,s797010
+BT000005726,s142380
+HT015287603,s844030
+HT015287607,s613050
+HT015287610,s613050
+BT000005912,s844200
+HT014135282,s142320
+BT000006004,s841070
+HT014092716,s841070
+BT000002980,s846000
+BT000003381,s768010
+BT000003382,s768010
+BT000003384,s768010
+BT000003876,s797010
+HT014129983,s613000
+BT000004467,s797010
+BT000004477,s797010
+HT013989184,s844200
+BT000006612,s109000
+BT000006653,s841040
+BT000005649,s613010
+BT000005892,s768030
+BT000006118,s841070
+HT017050088,s706090
+BT000003688,s843060
+BT000003691,s843046
+BT000003716,s406130
+BT000003720,s841070
+BT000004217,s586020
+HT014092622,s841070
+BT000005982,s768010
+BT000006110,s572050
+BT000006116,s820100
+BT000006170,s547460
+BT000003028,s768030
+BT000003031,s841070
+BT000003037,s724040
+HT013988338,s768030
+BT000003066,s611050
+BT000004224,s844030
+TT002621880,s841070
+BT000005519,s700100
+BT000005526,s768010
+BT000005699,s217000
+TT002623120,s841070
+BT000005779,s240000
+BT000005799,s844030
+HT018186727,s503260
+BT000006018,s557020
+HT017017675,s841070
+HT014132392,s217000
+HT017016033,s708060
+HT013988363,s768030
+HT012774080,s428030
+HT014134939,s768010
+HT012819603,s102000
+HT004170735,s708029
+HT013988397,s768030
+HT007382423,s844030
+HT017685972,s788000
+HT012773930,s240000
+HT015612888,s768010
+HT013989541,s102070
+HT019130151,s220500
+HT019036954,s205040
+HT007599850,s142380
+HT013989390,s543800
+HT012823761,s841070
+HT014131829,s841070
+HT019178300,s226030
+HT013989670,s841070
+HT017424893,s572050
+HT017525587,s102000
+HT013989567,s220500
+HT013990956,s768010
+HT014130082,s228000
+HT012821242,s240000
+HT018992763,s843000
+HT014715542,s225000
+HT017697493,s844200
+HT003470621,s240000
+HT012776653,s744000
+HT018933209,s700100
+HT007958739,s841070
+HT018286871,s768010
+HT017306644,s217000
+HT016001979,s611060
+HT014904955,s768010
+HT013150397,s841070
+HT019025915,s240000
+HT019493573,s228000
+HT016002174,s611060
+HT018791004,s225000
+CT002011301,s797010
+HT016997682,s800100
+HT016002021,s611060
+HT016995047,s102000
+HT018189291,s735020
+HT018942954,s841060
+HT019105829,s102000
+HT016006770,s844040
+TT002926919,s841070
+HT013151844,s425020
+HT013554686,s226030
+HT018788766,s562000
+HT014404763,s841070
+HT016456752,s768030
+HT014903161,s768010
+HT013152404,s841070
+HT016009664,s613043
+HT015381800,s213000
+HT019354900,s844200
+HT017054850,s768010
+HT014402043,s613010
+HT010848782,s768010
+HT013552435,s841070
+HT014992451,s224060
+HT014992452,s225000
+HT014992454,s221000
+HT018584327,s841070
+HT016141485,s841070
+HT013652175,s841070
+HT017056757,s532030
+HT013555584,s566070
+HT013201188,s544210
+HT016001648,s844200
+HT016002115,s220500
+HT016458705,s849056
+HT018183132,s736000
+HT016310431,s841070
+HT013652497,s102060
+HT017060772,s524010
+HT018966175,s240000
+HT014995656,s425020
+HT015383632,s843060
+HT014995854,s841070
+HT013151784,s768030
+HT019423240,s632050
+HT013554645,s262012
+HT003295274,s557000
+HT003295284,s557020
+HT016313237,s768010
+HT011094158,s702000
diff --git a/nwbib/nwbib_subjects_bulk.py b/nwbib/nwbib_subjects_bulk.py
new file mode 100644
index 0000000..58bb17d
--- /dev/null
+++ b/nwbib/nwbib_subjects_bulk.py
@@ -0,0 +1,143 @@
+'''
+Created on Feb 16, 2018
+
+@author: fsteeg
+'''
+
+import json
+import csv
+import pickle
+import requests
+from pathlib import Path
+from sklearn.feature_extraction.text import HashingVectorizer
+from sklearn.svm import LinearSVC
+from sklearn.metrics.classification import accuracy_score
+
+
+bulk_file = 'nwbib-subjects-bulk.jsonl'
+url = 'http://lobid.org/resources/search'
+params = {
+    'q': 'rheinland',  # for testing: use small-ish set
+    'nested': 'subject:subject.source.id:"http://purl.org/lobid/nwbib"',
+    'format': 'bulk'
+}
+saved_classifier_file = 'nwbib-subjects-classifier.pkl'
+stop_word_url = 'https://raw.githubusercontent.com/solariz/german_stopwords/master/german_stopwords_plain.txt'
+classifier = LinearSVC()
+
+output_file = 'nwbib-subjects-bulk-predict.csv'
+
+
+def main():
+
+    vectorizer = HashingVectorizer(n_features=2 ** 18, stop_words=stop_words())
+
+    create_bulk_data()
+    hbzIds, subjects, texts = load_from_jsonl(bulk_file)
+
+    test_set_size = len(subjects) // 100
+    Y_train, X_train_texts = subjects[test_set_size:], texts[test_set_size:]
+    ids_test, Y_test, X_test_texts = \
+        hbzIds[:test_set_size], subjects[:test_set_size], texts[:test_set_size]
+
+    print('{} training docs, {} testing docs'.format(len(Y_train), len(Y_test)))
+    print_info(Y_train[0], X_train_texts[0], vectorizer)
+
+    X_train = vectorizer.transform(X_train_texts)
+    X_test = vectorizer.transform(X_test_texts)
+
+    classifier = create_classifier(X_train, Y_train)
+
+    prediction, _score = predict(X_test, Y_test, classifier)
+
+    data = [(hbzId, prediction[i]) for (i, hbzId) in enumerate(ids_test)]
+    write_to_csv(output_file, data)
+
+
+def stop_words():
+    response = requests.get(
+        url=stop_word_url)
+    return [line.strip() for line in response.text.splitlines()
+            if not line.startswith(';')]
+
+
+def create_bulk_data():
+    if not Path(bulk_file).exists():
+        print('Getting bulk data...')
+        response = requests.get(url=url, params=params)
+        with open(bulk_file, 'w') as f:
+            f.write(response.text)
+    else:
+        print('Using local bulk data in {}...'.format(bulk_file))
+
+
+def create_classifier(X_train, Y_train):
+    result = None
+    if Path(saved_classifier_file).exists():
+        print('Loading trained classifier...')
+        with open(saved_classifier_file, 'rb') as c:
+            result = pickle.load(c)
+    else:
+        print('Training classifier...')
+        result = classifier.fit(X_train, Y_train)
+        with open(saved_classifier_file, 'wb') as c:
+            pickle.dump(result, c)
+    return result
+
+
+def print_info(subject, text, vectorizer):
+    # See http://scikit-learn.org/stable/modules/feature_extraction.html
+    print('Using vectorizer: {}'.format(vectorizer))
+    analyzer = vectorizer.build_analyzer()
+    vector = vectorizer.transform([text])[0]
+    print('{}, {}'.format(subject, analyzer(text)))
+    print(vector)
+
+
+def predict(X_test, Y_test, classifier):
+    print('Predicting...')
+    Y_pred = classifier.predict(X_test)
+    score = accuracy_score(Y_test, Y_pred)
+    print('{:1.4f} classification accuracy for {}'.format(
+        score, classifier))
+    return Y_pred, score
+
+
+def load_from_jsonl(jsonl):
+    with open(jsonl, "r") as f:
+        hbzIds = []
+        subjects = []
+        texts = []
+        for line in f.readlines():
+            entry = json.loads(line)
+            hbzId = entry['hbzId']
+            subject = first_nwbib_subject(entry)
+            title = entry.get('title', '')
+            sub = entry.get('otherTitleInformation', None)
+            corp = entry.get('corporateBodyForTitle', None)
+            vals = [title, sub[0] if sub else '', corp[0] if corp else '']
+            doc = ' '.join(vals).strip()
+            hbzIds.append(hbzId)
+            subjects.append(subject)
+            texts.append(doc)
+    return (hbzIds, subjects, texts)
+
+
+def write_to_csv(name, data):
+    with open(name, 'w', newline='') as csvfile:
+        writer = csv.writer(
+            csvfile, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL)
+        writer.writerow(['hbzId', 'subject'])
+        for (hbzId, subject) in data:
+            writer.writerow([hbzId, subject])
+
+
+def first_nwbib_subject(entry):
+    for subject in entry.get('subject', []):
+        source = subject.get('source', None)
+        if source and source.get('id', None) == 'http://purl.org/lobid/nwbib':
+            return subject['id'].split('#')[1]
+    return 'NULL'
+
+
+main()