Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

small code cleans. #337

Merged
merged 1 commit into from
Sep 27, 2017
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 0 additions & 2 deletions ctr/avazu_data_processer.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-import os
import sys
import csv
import cPickle
Expand Down
2 changes: 0 additions & 2 deletions ctr/infer.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import gzip
import argparse
import itertools
Expand Down
2 changes: 0 additions & 2 deletions ctr/network_conf.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import paddle.v2 as paddle
from paddle.v2 import layer
from paddle.v2 import data_type as dtype
Expand Down
2 changes: 0 additions & 2 deletions ctr/train.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-import os
import argparse
import gzip

Expand Down
27 changes: 14 additions & 13 deletions dssm/infer.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import argparse
import itertools

Expand Down Expand Up @@ -32,9 +30,10 @@
type=int,
required=True,
default=ModelType.CLASSIFICATION_MODE,
help="model type, %d for classification, %d for pairwise rank, %d for regression (default: classification)"
% (ModelType.CLASSIFICATION_MODE, ModelType.RANK_MODE,
ModelType.REGRESSION_MODE))
help=("model type, %d for classification, %d for pairwise rank, "
"%d for regression (default: classification)") %
(ModelType.CLASSIFICATION_MODE, ModelType.RANK_MODE,
ModelType.REGRESSION_MODE))
parser.add_argument(
'-s',
'--source_dic_path',
Expand All @@ -45,8 +44,8 @@
'--target_dic_path',
type=str,
required=False,
help="path of the target's word dic, if not set, the `source_dic_path` will be used"
)
help=("path of the target's word dictionary, "
"if not set, the `source_dic_path` will be used"))
parser.add_argument(
'-a',
'--model_arch',
Expand All @@ -69,8 +68,9 @@
'--dnn_dims',
type=str,
default='256,128,64,32',
help="dimentions of dnn layers, default is '256,128,64,32', which means create a 4-layer dnn, demention of each layer is 256, 128, 64 and 32"
)
help=("dimentions of dnn layers, default is '256,128,64,32', "
"which means create a 4-layer dnn, "
"demention of each layer is 256, 128, 64 and 32"))
parser.add_argument(
'-c',
'--class_num',
Expand All @@ -85,7 +85,8 @@
assert args.class_num > 1, "--class_num should be set in classification task."

layer_dims = map(int, args.dnn_dims.split(','))
args.target_dic_path = args.source_dic_path if not args.target_dic_path else args.target_dic_path
args.target_dic_path = args.source_dic_path if not args.target_dic_path \
else args.target_dic_path

paddle.init(use_gpu=False, trainer_count=1)

Expand Down Expand Up @@ -130,9 +131,9 @@ def infer(self, data_path):
for id, batch in enumerate(infer_reader()):
res = self.inferer.infer(input=batch)
predictions = [' '.join(map(str, x)) for x in res]
assert len(batch) == len(
predictions), "predict error, %d inputs, but %d predictions" % (
len(batch), len(predictions))
assert len(batch) == len(predictions), (
"predict error, %d inputs, "
"but %d predictions") % (len(batch), len(predictions))
output_f.write('\n'.join(map(str, predictions)) + '\n')


Expand Down
15 changes: 9 additions & 6 deletions dssm/network_conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,9 +29,9 @@ def __init__(self,
@class_num: int
number of categories.
'''
assert len(
vocab_sizes
) == 2, "vocab_sizes specify the sizes left and right inputs, and dim should be 2."
assert len(vocab_sizes) == 2, (
"vocab_sizes specify the sizes left and right inputs, "
"and dim should be 2.")
assert len(dnn_dims) > 1, "more than two layers is needed."

self.dnn_dims = dnn_dims
Expand Down Expand Up @@ -91,7 +91,8 @@ def create_fc(self, emb, prefix=''):
@emb: paddle.layer
output of the embedding layer
@prefix: str
prefix of layers' names, used to share parameters between more than one `fc` parts.
prefix of layers' names, used to share parameters between
more than one `fc` parts.
'''
_input_layer = paddle.layer.pooling(
input=emb, pooling_type=paddle.pooling.Max())
Expand All @@ -113,7 +114,8 @@ def create_cnn(self, emb, prefix=''):
@emb: paddle.layer
output of the embedding layer
@prefix: str
prefix of layers' names, used to share parameters between more than one `cnn` parts.
prefix of layers' names, used to share parameters between
more than one `cnn` parts.
'''

def create_conv(context_len, hidden_size, prefix):
Expand Down Expand Up @@ -174,7 +176,8 @@ def _build_rank_model(self):
- source sentence
- left_target sentence
- right_target sentence
- label, 1 if left_target should be sorted in front of right_target, otherwise 0.
- label, 1 if left_target should be sorted in front of
right_target, otherwise 0.
'''
logger.info("build rank model")
assert self.model_type.is_rank()
Expand Down
6 changes: 2 additions & 4 deletions dssm/reader.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
from utils import UNK, ModelType, TaskType, load_dic, sent2ids, logger, ModelType
from utils import UNK, ModelType, TaskType, load_dic, \
sent2ids, logger, ModelType


class Dataset(object):
Expand Down Expand Up @@ -38,7 +37,6 @@ def test(self):
'''
Load testset.
'''
# logger.info("[reader] load testset from %s" % self.test_path)
with open(self.test_path) as f:
for line_id, line in enumerate(f):
yield self.record_reader(line)
Expand Down
12 changes: 6 additions & 6 deletions dssm/train.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import argparse

import paddle.v2 as paddle
Expand Down Expand Up @@ -31,8 +29,8 @@
'--target_dic_path',
type=str,
required=False,
help="path of the target's word dic, if not set, the `source_dic_path` will be used"
)
help=("path of the target's word dictionary, "
"if not set, the `source_dic_path` will be used"))
parser.add_argument(
'-b',
'--batch_size',
Expand Down Expand Up @@ -221,7 +219,8 @@ def _event_handler(event):
event.pass_id, event.batch_id, event.cost, event.metrics))

# test model
if event.batch_id > 0 and event.batch_id % args.num_batches_to_test == 0:
if event.batch_id > 0 and \
event.batch_id % args.num_batches_to_test == 0:
if test_reader is not None:
if model_type.is_classification():
result = trainer.test(
Expand All @@ -231,7 +230,8 @@ def _event_handler(event):
else:
result = None
# save model
if event.batch_id > 0 and event.batch_id % args.num_batches_to_save_model == 0:
if event.batch_id > 0 and \
event.batch_id % args.num_batches_to_save_model == 0:
model_desc = "{type}_{arch}".format(
type=str(args.model_type), arch=str(args.model_arch))
with open("%sdssm_%s_pass_%05d.tar" %
Expand Down
1 change: 1 addition & 0 deletions generate_chinese_poetry/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
[TBD]
65 changes: 65 additions & 0 deletions generate_chinese_poetry/index.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@

<html>
<head>
<script type="text/x-mathjax-config">
MathJax.Hub.Config({
extensions: ["tex2jax.js", "TeX/AMSsymbols.js", "TeX/AMSmath.js"],
jax: ["input/TeX", "output/HTML-CSS"],
tex2jax: {
inlineMath: [ ['$','$'] ],
displayMath: [ ['$$','$$'] ],
processEscapes: true
},
"HTML-CSS": { availableFonts: ["TeX"] }
});
</script>
<script src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.0/MathJax.js" async></script>
<script type="text/javascript" src="../.tools/theme/marked.js">
</script>
<link href="http://cdn.bootcss.com/highlight.js/9.9.0/styles/darcula.min.css" rel="stylesheet">
<script src="http://cdn.bootcss.com/highlight.js/9.9.0/highlight.min.js"></script>
<link href="http://cdn.bootcss.com/bootstrap/4.0.0-alpha.6/css/bootstrap.min.css" rel="stylesheet">
<link href="https://cdn.jsdelivr.net/perfect-scrollbar/0.6.14/css/perfect-scrollbar.min.css" rel="stylesheet">
<link href="../.tools/theme/github-markdown.css" rel='stylesheet'>
</head>
<style type="text/css" >
.markdown-body {
box-sizing: border-box;
min-width: 200px;
max-width: 980px;
margin: 0 auto;
padding: 45px;
}
</style>


<body>

<div id="context" class="container-fluid markdown-body">
</div>

<!-- This block will be replaced by each markdown file content. Please do not change lines below.-->
<div id="markdown" style='display:none'>
[TBD]

</div>
<!-- You can change the lines below now. -->

<script type="text/javascript">
marked.setOptions({
renderer: new marked.Renderer(),
gfm: true,
breaks: false,
smartypants: true,
highlight: function(code, lang) {
code = code.replace(/&amp;/g, "&")
code = code.replace(/&gt;/g, ">")
code = code.replace(/&lt;/g, "<")
code = code.replace(/&nbsp;/g, " ")
return hljs.highlightAuto(code, [lang]).value;
}
});
document.getElementById("context").innerHTML = marked(
document.getElementById("markdown").innerHTML)
</script>
</body>
2 changes: 0 additions & 2 deletions hsigmoid/infer.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import os
import logging
import gzip
Expand Down
4 changes: 1 addition & 3 deletions hsigmoid/network_conf.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,5 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-

import math

import paddle.v2 as paddle


Expand Down
2 changes: 0 additions & 2 deletions hsigmoid/train.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import os
import logging
import gzip
Expand Down
Empty file modified image_classification/train.py
100755 → 100644
Empty file.
24 changes: 15 additions & 9 deletions ltr/lambda_rank.py
Original file line number Diff line number Diff line change
@@ -1,21 +1,26 @@
import os, sys
import os
import sys
import gzip
import paddle.v2 as paddle
import numpy as np
import functools
import argparse
import numpy as np

import paddle.v2 as paddle


def lambda_rank(input_dim):
"""
lambda_rank is a Listwise rank model, the input data and label must be sequences.
lambda_rank is a Listwise rank model, the input data and label
must be sequences.

https://papers.nips.cc/paper/2971-learning-to-rank-with-nonsmooth-cost-functions.pdf
parameters :
input_dim, one document's dense feature vector dimension

format of the dense_vector_sequence:
[[f, ...], [f, ...], ...], f is a float or an int number
"""

label = paddle.layer.data("label",
paddle.data_type.dense_vector_sequence(1))
data = paddle.layer.data("data",
Expand Down Expand Up @@ -88,11 +93,11 @@ def event_handler(event):


def lambda_rank_infer(pass_id):
"""lambda_rank model inference interface

parameters:
pass_id : inference model in pass_id
"""
lambda_rank model inference interface
parameters:
pass_id : inference model in pass_id
"""
print "Begin to Infer..."
input_dim = 46
output = lambda_rank(input_dim)
Expand All @@ -109,7 +114,8 @@ def lambda_rank_infer(pass_id):
if len(infer_data) == infer_data_num:
break

# predict score of infer_data document. Re-sort the document base on predict score
# predict score of infer_data document.
# Re-sort the document base on predict score
# in descending order. then we build the ranking documents
predicitons = paddle.infer(
output_layer=output, parameters=parameters, input=infer_data)
Expand Down
1 change: 0 additions & 1 deletion ltr/metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@ def ndcg(score_list):
e.g. predict rank score list :
>>> scores = [3, 2, 3, 0, 1, 2]
>>> ndcg_score = ndcg(scores)

"""

def dcg(score_list):
Expand Down
19 changes: 11 additions & 8 deletions ltr/ranknet.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,11 +13,11 @@

def half_ranknet(name_prefix, input_dim):
"""
parameter in same name will be shared in paddle framework,
these parameters in ranknet can be used in shared state, e.g. left network and right network
shared parameters in detail
https://github.com/PaddlePaddle/Paddle/blob/develop/doc/design/api.md
"""
parameter in same name will be shared in paddle framework,
these parameters in ranknet can be used in shared state,
e.g. left network and right network shared parameters in detail
https://github.com/PaddlePaddle/Paddle/blob/develop/doc/design/api.md
"""
# data layer
data = paddle.layer.data(name_prefix + "/data",
paddle.data_type.dense_vector(input_dim))
Expand Down Expand Up @@ -102,12 +102,14 @@ def ranknet_infer(pass_id):
print "Begin to Infer..."
feature_dim = 46

# we just need half_ranknet to predict a rank score, which can be used in sort documents
# we just need half_ranknet to predict a rank score,
# which can be used in sort documents
output = half_ranknet("infer", feature_dim)
parameters = paddle.parameters.Parameters.from_tar(
gzip.open("ranknet_params_%d.tar.gz" % (pass_id)))

# load data of same query and relevance documents, need ranknet to rank these candidates
# load data of same query and relevance documents,
# need ranknet to rank these candidates
infer_query_id = []
infer_data = []
infer_doc_index = []
Expand All @@ -121,7 +123,8 @@ def ranknet_infer(pass_id):
infer_query_id.append(query_id)
infer_data.append([feature_vector])

# predict score of infer_data document. Re-sort the document base on predict score
# predict score of infer_data document.
# Re-sort the document base on predict score
# in descending order. then we build the ranking documents
scores = paddle.infer(
output_layer=output, parameters=parameters, input=infer_data)
Expand Down
2 changes: 1 addition & 1 deletion mt_with_external_memory/external_memory.py
100755 → 100644
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ class ExternalMemory(object):
Besides, the ExternalMemory class must be used together with
paddle.layer.recurrent_group (within its step function). It can never be
used in a standalone manner.

For more details, please refer to
`Neural Turing Machines <https://arxiv.org/abs/1410.5401>`_.

Expand Down
2 changes: 1 addition & 1 deletion mt_with_external_memory/model.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
"""
"""
Contains model configuration for external-memory-enhanced seq2seq.

The "external memory" refers to two types of memories.
Expand Down
Loading