Skip to content

Commit 9ba1d4a

Browse files
committed
fix python style
1 parent c3e96b9 commit 9ba1d4a

File tree

4 files changed

+40
-40
lines changed

4 files changed

+40
-40
lines changed

examples/src/main/python/ml/gradient_boosted_tree_classifier_example.py

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -38,41 +38,41 @@
3838
# $example on$
3939
# Load and parse the data file, converting it to a DataFrame.
4040
data = MLUtils.loadLibSVMFile(sc, "data/mllib/sample_libsvm_data.txt").toDF()
41-
41+
4242
# Index labels, adding metadata to the label column.
4343
# Fit on whole dataset to include all labels in index.
4444
labelIndexer = StringIndexer(inputCol="label", outputCol="indexedLabel").fit(data)
4545
# Automatically identify categorical features, and index them.
4646
# Set maxCategories so features with > 4 distinct values are treated as continuous.
4747
featureIndexer =\
4848
VectorIndexer(inputCol="features", outputCol="indexedFeatures", maxCategories=4).fit(data)
49-
49+
5050
# Split the data into training and test sets (30% held out for testing)
5151
(trainingData, testData) = data.randomSplit([0.7, 0.3])
52-
52+
5353
# Train a GBT model.
5454
gbt = GBTClassifier(labelCol="indexedLabel", featuresCol="indexedFeatures", maxIter=10)
55-
55+
5656
# Chain indexers and GBT in a Pipeline
5757
pipeline = Pipeline(stages=[labelIndexer, featureIndexer, gbt])
58-
58+
5959
# Train model. This also runs the indexers.
6060
model = pipeline.fit(trainingData)
61-
61+
6262
# Make predictions.
6363
predictions = model.transform(testData)
64-
64+
6565
# Select example rows to display.
6666
predictions.select("prediction", "indexedLabel", "features").show(5)
67-
67+
6868
# Select (prediction, true label) and compute test error
6969
evaluator = MulticlassClassificationEvaluator(
7070
labelCol="indexedLabel", predictionCol="prediction", metricName="precision")
7171
accuracy = evaluator.evaluate(predictions)
7272
print("Test Error = %g" % (1.0 - accuracy))
73-
73+
7474
gbtModel = model.stages[2]
75-
print(gbtModel) # summary only
75+
print(gbtModel) # summary only
7676
# $example off$
7777

7878
sc.stop()

examples/src/main/python/ml/gradient_boosted_tree_regressor_example.py

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -38,38 +38,38 @@
3838
# $example on$
3939
# Load and parse the data file, converting it to a DataFrame.
4040
data = MLUtils.loadLibSVMFile(sc, "data/mllib/sample_libsvm_data.txt").toDF()
41-
41+
4242
# Automatically identify categorical features, and index them.
4343
# Set maxCategories so features with > 4 distinct values are treated as continuous.
4444
featureIndexer =\
4545
VectorIndexer(inputCol="features", outputCol="indexedFeatures", maxCategories=4).fit(data)
46-
46+
4747
# Split the data into training and test sets (30% held out for testing)
4848
(trainingData, testData) = data.randomSplit([0.7, 0.3])
49-
49+
5050
# Train a GBT model.
5151
gbt = GBTRegressor(featuresCol="indexedFeatures", maxIter=10)
52-
52+
5353
# Chain indexer and GBT in a Pipeline
5454
pipeline = Pipeline(stages=[featureIndexer, gbt])
55-
55+
5656
# Train model. This also runs the indexer.
5757
model = pipeline.fit(trainingData)
58-
58+
5959
# Make predictions.
6060
predictions = model.transform(testData)
61-
61+
6262
# Select example rows to display.
6363
predictions.select("prediction", "label", "features").show(5)
64-
64+
6565
# Select (prediction, true label) and compute test error
6666
evaluator = RegressionEvaluator(
6767
labelCol="label", predictionCol="prediction", metricName="rmse")
6868
rmse = evaluator.evaluate(predictions)
6969
print("Root Mean Squared Error (RMSE) on test data = %g" % rmse)
70-
70+
7171
gbtModel = model.stages[1]
72-
print(gbtModel) # summary only
72+
print(gbtModel) # summary only
7373
# $example off$
7474

7575
sc.stop()

examples/src/main/python/ml/random_forest_classifier_example.py

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -38,41 +38,41 @@
3838
# $example on$
3939
# Load and parse the data file, converting it to a DataFrame.
4040
data = MLUtils.loadLibSVMFile(sc, "data/mllib/sample_libsvm_data.txt").toDF()
41-
41+
4242
# Index labels, adding metadata to the label column.
4343
# Fit on whole dataset to include all labels in index.
4444
labelIndexer = StringIndexer(inputCol="label", outputCol="indexedLabel").fit(data)
4545
# Automatically identify categorical features, and index them.
4646
# Set maxCategories so features with > 4 distinct values are treated as continuous.
4747
featureIndexer =\
4848
VectorIndexer(inputCol="features", outputCol="indexedFeatures", maxCategories=4).fit(data)
49-
49+
5050
# Split the data into training and test sets (30% held out for testing)
5151
(trainingData, testData) = data.randomSplit([0.7, 0.3])
52-
52+
5353
# Train a RandomForest model.
5454
rf = RandomForestClassifier(labelCol="indexedLabel", featuresCol="indexedFeatures")
55-
55+
5656
# Chain indexers and forest in a Pipeline
5757
pipeline = Pipeline(stages=[labelIndexer, featureIndexer, rf])
58-
58+
5959
# Train model. This also runs the indexers.
6060
model = pipeline.fit(trainingData)
61-
61+
6262
# Make predictions.
6363
predictions = model.transform(testData)
64-
64+
6565
# Select example rows to display.
6666
predictions.select("prediction", "indexedLabel", "features").show(5)
67-
67+
6868
# Select (prediction, true label) and compute test error
6969
evaluator = MulticlassClassificationEvaluator(
7070
labelCol="indexedLabel", predictionCol="prediction", metricName="precision")
7171
accuracy = evaluator.evaluate(predictions)
7272
print("Test Error = %g" % (1.0 - accuracy))
73-
73+
7474
rfModel = model.stages[2]
75-
print(rfModel) # summary only
75+
print(rfModel) # summary only
7676
# $example off$
7777

7878
sc.stop()

examples/src/main/python/ml/random_forest_regressor_example.py

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -38,38 +38,38 @@
3838
# $example on$
3939
# Load and parse the data file, converting it to a DataFrame.
4040
data = MLUtils.loadLibSVMFile(sc, "data/mllib/sample_libsvm_data.txt").toDF()
41-
41+
4242
# Automatically identify categorical features, and index them.
4343
# Set maxCategories so features with > 4 distinct values are treated as continuous.
4444
featureIndexer =\
4545
VectorIndexer(inputCol="features", outputCol="indexedFeatures", maxCategories=4).fit(data)
46-
46+
4747
# Split the data into training and test sets (30% held out for testing)
4848
(trainingData, testData) = data.randomSplit([0.7, 0.3])
49-
49+
5050
# Train a RandomForest model.
5151
rf = RandomForestRegressor(featuresCol="indexedFeatures")
52-
52+
5353
# Chain indexer and forest in a Pipeline
5454
pipeline = Pipeline(stages=[featureIndexer, rf])
55-
55+
5656
# Train model. This also runs the indexer.
5757
model = pipeline.fit(trainingData)
58-
58+
5959
# Make predictions.
6060
predictions = model.transform(testData)
61-
61+
6262
# Select example rows to display.
6363
predictions.select("prediction", "label", "features").show(5)
64-
64+
6565
# Select (prediction, true label) and compute test error
6666
evaluator = RegressionEvaluator(
6767
labelCol="label", predictionCol="prediction", metricName="rmse")
6868
rmse = evaluator.evaluate(predictions)
6969
print("Root Mean Squared Error (RMSE) on test data = %g" % rmse)
70-
70+
7171
rfModel = model.stages[1]
72-
print(rfModel) # summary only
72+
print(rfModel) # summary only
7373
# $example off$
7474

7575
sc.stop()

0 commit comments

Comments
 (0)