Merge branch 'release/1.3.0'

Accenture · Mar 9, 2020 · cf7d62d · cf7d62d
2 parents 07ce70f + 1885276
commit cf7d62d
Show file tree

Hide file tree

Showing 42 changed files with 3,510 additions and 777 deletions.
diff --git a/.circleci/config.yml b/.circleci/config.yml
@@ -76,7 +76,7 @@ jobs:
  command: |
  . venv/bin/activate
  venv/bin/python3 -m pip install flake8
- venv/bin/python3 -m flake8 ampligraph --max-line-length 120 --ignore=W291,W293
+ venv/bin/python3 -m flake8 ampligraph --max-line-length 120 --ignore=W291,W293,W503
 
  docs:
  docker:
@@ -99,8 +99,33 @@ workflows:
  version: 2
  checks:
  jobs:
- - build
- - pip-check
- - lint
- - docs
- - test
+ - build:
+ filters:
+ branches:
+ only: 
+ - master
+ - develop
+ - pip-check:
+ filters:
+ branches:
+ only: 
+ - master
+ - develop 
+ - lint:
+ filters:
+ branches:
+ only: 
+ - master
+ - develop 
+ - docs:
+ filters:
+ branches:
+ only: 
+ - master
+ - develop 
+ - test:
+ filters:
+ branches:
+ only: 
+ - master
+ - develop 
diff --git a/.readthedocs.yml b/.readthedocs.yml
@@ -0,0 +1,21 @@
+# Read the Docs configuration file
+# See https://docs.readthedocs.io/en/stable/config-file/v2.html for details
+
+# requirements_readthedocs.txt is only used by readthedocs and includes tensorflow, which is imported
+# in the root __init__.py since d7fbb98 to suppress tf 1.x deprecation warnings.
+# This causes readthedocs.io builds to fail.
+
+version: 2
+
+sphinx:
+ configuration: docs/conf.py
+
+formats: all
+
+python:
+ version: 3.7
+ install:
+ - requirements: docs/requirements_readthedocs.txt
+ - method: setuptools
+ path: .
+ system_packages: true
diff --git a/README.md b/README.md
@@ -43,7 +43,7 @@ It then combines embeddings with model-specific scoring functions to predict uns
 AmpliGraph includes the following submodules:
 
 * **Datasets**: helper functions to load datasets (knowledge graphs).
-* **Models**: knowledge graph embedding models. AmpliGraph contains **TransE**, **DistMult**, **ComplEx**, **HolE**, **ConvKB**. (More to come!)
+* **Models**: knowledge graph embedding models. AmpliGraph contains **TransE**, **DistMult**, **ComplEx**, **HolE**, **ConvE**, **ConvKB**. (More to come!)
 * **Evaluation**: metrics and evaluation protocols to assess the predictive power of the models.
 * **Discovery**: High-level convenience APIs for knowledge discovery (discover new facts, cluster entities, predict near duplicates).
 
@@ -52,15 +52,15 @@ AmpliGraph includes the following submodules:
 
 ### Prerequisites
 
-* Linux Box
+* Linux, macOS, Windows
 * Python ≥ 3.6
 
 #### Provision a Virtual Environment
 
 Create and activate a virtual environment (conda)
 
 ```
-conda create --name ampligraph python=3.6
+conda create --name ampligraph python=3.7
 source activate ampligraph
 ```
 
@@ -72,21 +72,21 @@ Install from pip or conda:
 **CPU-only**
 
 ```
-pip install "tensorflow>=1.13.1,<2.0"
+pip install "tensorflow>=1.14.0,<2.0"
 
 or
 
-conda install tensorflow=1.13.1
+conda install tensorflow'>=1.14.0,<2.0.0'
 ```
 
 **GPU support**
 
 ```
-pip install "tensorflow-gpu>=1.13.1,<2.0"
+pip install "tensorflow-gpu>=1.14.0,<2.0"
 
 or
 
-conda install tensorflow-gpu=1.13.1
+conda install tensorflow-gpu'>=1.14.0,<2.0.0'
 ```
 
 
@@ -116,13 +116,13 @@ pip install -e .
 ```python
 >> import ampligraph
 >> ampligraph.__version__
-'1.2.0'
+'1.3.0'
 ```
 
 
 ## Predictive Power Evaluation (MRR Filtered)
 
-AmpliGraph includes implementations of TransE, DistMult, ComplEx, HolE and ConvKB.
+AmpliGraph includes implementations of TransE, DistMult, ComplEx, HolE, ConvE, and ConvKB.
 Their predictive power is reported below and compared against the state-of-the-art results in literature.
 [More details available here](https://docs.ampligraph.org/en/latest/experiments.html).
 
@@ -133,6 +133,8 @@ Their predictive power is reported below and compared against the state-of-the-a
 | DistMult (AmpliGraph) | 0.31 | 0.47 | 0.50 | 0.78 | 0.82 |
 | ComplEx (AmpliGraph) | 0.32 | **0.51**| 0.49 | 0.80 | 0.94 |
 | HolE (AmpliGraph) | 0.31 | 0.47 | 0.50 | 0.80 | 0.94 |
+| ConvE (AmpliGraph) | 0.26 | 0.45 | 0.30 | 0.50 | 0.93 |
+| ConvE (1-N, AmpliGraph) | 0.32 | 0.48 | 0.40 | 0.80 | **0.95** |
 | ConvKB (AmpliGraph) | 0.23 | 0.39 | 0.30 | 0.65 | 0.80 |
 
 <sub>

diff --git a/ampligraph/__init__.py b/ampligraph/__init__.py
@@ -5,11 +5,14 @@
 #
 # http://www.apache.org/licenses/LICENSE-2.0
 #
-"""Explainable Link Prediction is a library for relational learning on knowledge graphs."""
+"""AmpliGraph is a library for relational learning on knowledge graphs."""
 import logging.config
 import pkg_resources
 
-__version__ = '1.2.0'
+import tensorflow as tf
+tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR)
+
+__version__ = '1.3.0'
 __all__ = ['datasets', 'latent_features', 'discovery', 'evaluation', 'utils']
 
 logging.config.fileConfig(pkg_resources.resource_filename(__name__, 'logger.conf'), disable_existing_loggers=False)
diff --git a/ampligraph/datasets/__init__.py b/ampligraph/datasets/__init__.py
@@ -13,8 +13,8 @@
 from .abstract_dataset_adapter import AmpligraphDatasetAdapter
 from .sqlite_adapter import SQLiteAdapter
 from .numpy_adapter import NumpyDatasetAdapter
-
+from .oneton_adapter import OneToNDatasetAdapter
 
 __all__ = ['load_from_csv', 'load_from_rdf', 'load_from_ntriples', 'load_wn18', 'load_fb15k',
  'load_fb15k_237', 'load_yago3_10', 'load_wn18rr', 'load_wn11', 'load_fb13',
- 'AmpligraphDatasetAdapter', 'NumpyDatasetAdapter', 'SQLiteAdapter']
+ 'AmpligraphDatasetAdapter', 'NumpyDatasetAdapter', 'SQLiteAdapter', 'OneToNDatasetAdapter']
diff --git a/ampligraph/datasets/abstract_dataset_adapter.py b/ampligraph/datasets/abstract_dataset_adapter.py
@@ -9,14 +9,14 @@ def __init__(self):
  """Initialize the class variables
  """
  self.dataset = {}
- 
+
  # relation to idx mappings
  self.rel_to_idx = {}
  # entities to idx mappings
  self.ent_to_idx = {}
  # Mapped status of each dataset
  self.mapped_status = {}
- 
+
  def use_mappings(self, rel_to_idx, ent_to_idx):
  """Use an existing mapping with the datasource.
  """
@@ -25,14 +25,14 @@ def use_mappings(self, rel_to_idx, ent_to_idx):
  # set the mapped status to false, since we are changing the dictionary
  for key in self.dataset.keys():
  self.mapped_status[key] = False
- 
+
  def generate_mappings(self, use_all=False):
  """Generate mappings from either train set or use all dataset to generate mappings
  Parameters
  ----------
  use_all : boolean
  If True, it generates mapping from all the data. If False, it only uses training set to generate mappings
- 
+
  Returns
  -------
  rel_to_idx : dictionary
@@ -41,36 +41,51 @@ def generate_mappings(self, use_all=False):
  entity to idx mapping dictionary
  """
  raise NotImplementedError('Abstract Method not implemented!')
- 
+
  def get_size(self, dataset_type="train"):
  """Returns the size of the specified dataset
  Parameters
  ----------
  dataset_type : string
  type of the dataset
- 
+
  Returns
  -------
  size : int
  size of the specified dataset
  """
- 
+
  raise NotImplementedError('Abstract Method not implemented!')
-
+
+ def data_exists(self, dataset_type="train"):
+ """Checks if a dataset_type exists in the adapter.
+ Parameters
+ ----------
+ dataset_type : string
+ type of the dataset
+
+ Returns
+ -------
+ exists : bool
+ Boolean indicating if dataset_type exists in the adapter.
+ """
+
+ raise NotImplementedError('Abstract Method not implemented!')
+
  def set_data(self, dataset, dataset_type=None, mapped_status=False):
  """set the dataset based on the type
  Parameters
  ----------
  dataset : nd-array or dictionary
- dataset of triples 
+ dataset of triples
  dataset_type : string
  if the dataset parameter is an nd- array then this indicates the type of the data being based
  mapped_status : bool
  indicates whether the data has already been mapped to the indices
- 
+
  """
  raise NotImplementedError('Abstract Method not implemented!')
- 
+
  def map_data(self, remap=False):
  """map the data to the mappings of ent_to_idx and rel_to_idx
  Parameters
@@ -79,7 +94,7 @@ def map_data(self, remap=False):
  remap the data, if already mapped. One would do this if the dictionary is updated.
  """
  raise NotImplementedError('Abstract Method not implemented!')
- 
+
  def set_filter(self, filter_triples):
  """set's the filter that need to be used while generating evaluation batch
  Parameters
@@ -88,10 +103,10 @@ def set_filter(self, filter_triples):
  triples that would be used as filter
  """
  raise NotImplementedError('Abstract Method not implemented!')
- 
+
  def get_next_batch(self, batches_count=-1, dataset_type="train", use_filter=False):
  """Generator that returns the next batch of data.
- 
+
  Parameters
  ----------
  dataset_type: string
@@ -111,7 +126,7 @@ def get_next_batch(self, batches_count=-1, dataset_type="train", use_filter=Fals
  all subjects that were involved in the ?-p-o relation. This is returned only if use_filter is set to true.
  """
  raise NotImplementedError('Abstract Method not implemented!')
- 
+
  def cleanup(self):
  """Cleans up the internal state
  """