diff --git a/docs/mllib-frequent-pattern-mining.md b/docs/mllib-frequent-pattern-mining.md index 93e3f0b2d226..16d03c9d3059 100644 --- a/docs/mllib-frequent-pattern-mining.md +++ b/docs/mllib-frequent-pattern-mining.md @@ -177,6 +177,20 @@ Refer to the [`PrefixSpan` Java docs](api/java/org/apache/spark/mllib/fpm/Prefix {% include_example java/org/apache/spark/examples/mllib/JavaPrefixSpanExample.java %} + + +
+ +[`PrefixSpan`](api/python/pyspark.mllib.html#pyspark.mllib.fpm.PrefixSpan) implements the +PrefixSpan algorithm. +Calling `PrefixSpan.run` returns a +[`PrefixSpanModel`](api/python/pyspark.mllib.html#pyspark.mllib.fpm.PrefixSpanModel) +that stores the frequent sequences with their frequencies. + +Refer to the [`PrefixSpan` Python docs](api/python/pyspark.mllib.html#pyspark.mllib.fpm.PrefixSpan) and [`PrefixSpanModel` Python docs](api/python/pyspark.mllib.html#pyspark.mllib.fpm.PrefixSpanModel) for details on the API. + +{% include_example python/mllib/prefix_span_example.py %} +
diff --git a/examples/src/main/python/mllib/prefix_span_example.py b/examples/src/main/python/mllib/prefix_span_example.py new file mode 100644 index 000000000000..3ecad105488a --- /dev/null +++ b/examples/src/main/python/mllib/prefix_span_example.py @@ -0,0 +1,38 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# $example on$ +from pyspark.mllib.fpm import PrefixSpan +# $example off$ +from pyspark import SparkContext + +if __name__ == "__main__": + sc = SparkContext(appName="PythonPrefixSpanExample") + + # $example on$ + sequences = sc.parallelize([ + [[1,2],[3]], + [[1],[3,2],[1,2]], + [[1,2],[5]], + [[6]], + ]) + + model = PrefixSpan.train(sequences, minSupport=0.5, maxPatternLength=5) + result = model.freqSequences().collect() + for fs in result: + print('{}, {}'.format(fs.sequence,fs.freq)) + # $example off$