diff --git a/conf/zeppelin-site.xml.template b/conf/zeppelin-site.xml.template index 0f44c62ad51..a537741458f 100755 --- a/conf/zeppelin-site.xml.template +++ b/conf/zeppelin-site.xml.template @@ -77,6 +77,14 @@ --> + + zeppelin.notebook.storage org.apache.zeppelin.notebook.repo.VFSNotebookRepo diff --git a/docs/_includes/themes/zeppelin/_navigation.html b/docs/_includes/themes/zeppelin/_navigation.html index 73e856a4fa3..0c79af226a0 100644 --- a/docs/_includes/themes/zeppelin/_navigation.html +++ b/docs/_includes/themes/zeppelin/_navigation.html @@ -65,7 +65,8 @@
  • Notebook as Homepage
  • -
  • S3 Storage
  • +
  • Git Storage
  • +
  • S3 Storage
  • Interpreter API
  • diff --git a/docs/storage/storage.md b/docs/storage/storage.md index a04a703006b..bcfb8371c65 100644 --- a/docs/storage/storage.md +++ b/docs/storage/storage.md @@ -19,10 +19,30 @@ limitations under the License. --> ### Notebook Storage -In Zeppelin there are two option for storage Notebook, by default the notebook is storage in the notebook folder in your local File System and the second option is S3. +Zeppelin has a pluggable notebook storage mechanism controlled by `zeppelin.notebook.storage` configuration option with multiple implementations. +There are few Notebook storages avaialble for a use out of the box: + - (default) all notes are saved in the notebook folder in your local File System - `VFSNotebookRepo` + - there is also an option to version it using local Git repository - `GitNotebookRepo` + - another option is Amazon S3 service - `S3NotebookRepo` + +Multiple storages can be used at the same time by providing a comma-separated list of the calss-names in the confiruration. +By default, only first two of them will be automatically kept in sync by Zeppelin. + +
    +#### Notebook Storage in local Git repository + +To enable versioning for all your local notebooks though a standard Git repository - uncomment the next property in `zeppelin-site.xml` in order to use GitNotebookRepo class: + +``` + + zeppelin.notebook.storage + org.apache.zeppelin.notebook.repo.GitNotebookRepo + notebook persistence layer implementation + +```
    -#### Notebook Storage in S3 +#### Notebook Storage in S3 For notebook storage in S3 you need the AWS credentials, for this there are three options, the enviroment variable ```AWS_ACCESS_KEY_ID``` and ```AWS_ACCESS_SECRET_KEY```, credentials file in the folder .aws in you home and IAM role for your instance. For complete the need steps is necessary: diff --git a/zeppelin-distribution/src/bin_license/LICENSE b/zeppelin-distribution/src/bin_license/LICENSE index e043b90584a..cc08f98f4a7 100644 --- a/zeppelin-distribution/src/bin_license/LICENSE +++ b/zeppelin-distribution/src/bin_license/LICENSE @@ -1,7 +1,7 @@ (Apache 2.0) nvd3.js v1.1.15-beta (http://nvd3.org/) - https://github.com/novus/nvd3/blob/v1.1.15-beta/LICENSE.md (Apache 2.0) gson v2.2 (com.google.code.gson:gson:jar:2.2 - https://github.com/google/gson) - https://github.com/google/gson/blob/gson-2.2/LICENSE (Apache 2.0) Amazon Web Services SDK for Java v1.10.1 (https://aws.amazon.com/sdk-for-java/) - https://raw.githubusercontent.com/aws/aws-sdk-java/1.10.1/LICENSE.txt - + (Apache 2.0) JavaEWAH v0.7.9 (https://github.com/lemire/javaewah) - https://github.com/lemire/javaewah/blob/master/LICENSE-2.0.txt The following components are provided under Apache License. @@ -115,13 +115,15 @@ The text of each license is also included at licenses/LICENSE-[project]-[version (BSD 3 Clause) d3 v2.10.2 (https://d3js.org/) - https://github.com/mbostock/d3/blob/v2.10.2/LICENSE (BSD 3 Clause) ace-builds v1.1.9 (https://github.com/ajaxorg/ace-builds) - https://github.com/ajaxorg/ace-builds/blob/v1.1.9/LICENSE (BSD 3 Clause) Ace v1.1.9 (http://ace.c9.io/) - https://github.com/ajaxorg/ace/blob/v1.1.9/LICENSE - (BSD Style) dom4j v1.6.1 (http://www.dom4j.org) - https://github.com/dom4j/dom4j/blob/dom4j_1_6_1/LICENSE.txt + (BSD Style) dom4j v1.6.1 (http://www.dom4j.org) - https://github.com/dom4j/dom4j/blob/dom4j_1_6_1/LICENSE.txt + (BSD Style) JSch v0.1.53 (http://www.jcraft.com) - http://www.jcraft.com/jsch/LICENSE.txt (BSD 3 Clause) highlightjs v8.4.0 (https://highlightjs.org/) - https://github.com/isagalaev/highlight.js/blob/8.4/LICENSE The following components are provided under the BSD-style License. + (New BSD License) JGit (org.eclipse.jgit:org.eclipse.jgit:jar:4.1.1.201511131810-r - https://eclipse.org/jgit/) (New BSD License) Kryo (com.esotericsoftware.kryo:kryo:2.21 - http://code.google.com/p/kryo/) (New BSD License) MinLog (com.esotericsoftware.minlog:minlog:1.2 - http://code.google.com/p/minlog/) (New BSD License) ReflectASM (com.esotericsoftware.reflectasm:reflectasm:1.07 - http://code.google.com/p/reflectasm/) @@ -155,7 +157,7 @@ EPL license The following components are provided under the EPL License. (EPL 1.0) Aether (org.sonatype.aether - http://www.eclipse.org/aether/) - + (EPL 1.0) JDT Annotations For Enhanced Null Analysis (org.eclipse.jdt:org.eclipse.jdt.annotation:1.1.0 - https://repo.eclipse.org/content/repositories/eclipse-releases/org/eclipse/jdt/org.eclipse.jdt.annotation) ======================================================================== diff --git a/zeppelin-server/src/main/java/org/apache/zeppelin/server/ZeppelinServer.java b/zeppelin-server/src/main/java/org/apache/zeppelin/server/ZeppelinServer.java index ea8a0b64c45..46e6f92e657 100644 --- a/zeppelin-server/src/main/java/org/apache/zeppelin/server/ZeppelinServer.java +++ b/zeppelin-server/src/main/java/org/apache/zeppelin/server/ZeppelinServer.java @@ -19,14 +19,12 @@ import java.io.File; import java.io.IOException; -import java.lang.reflect.Constructor; import java.util.EnumSet; import java.util.HashSet; import java.util.Set; import javax.net.ssl.SSLContext; import javax.servlet.DispatcherType; -import javax.servlet.Servlet; import javax.ws.rs.core.Application; import org.apache.cxf.jaxrs.servlet.CXFNonSpringJaxrsServlet; @@ -60,8 +58,6 @@ /** * Main class of Zeppelin. * - * @author Leemoonsoo - * */ public class ZeppelinServer extends Application { @@ -69,13 +65,10 @@ public class ZeppelinServer extends Application { private SchedulerFactory schedulerFactory; public static Notebook notebook; - public static NotebookServer notebookServer; - public static Server jettyServer; private InterpreterFactory replFactory; - private NotebookRepo notebookRepo; public static void main(String[] args) throws Exception { @@ -113,6 +106,7 @@ public static void main(String[] args) throws Exception { try { jettyServer.stop(); ZeppelinServer.notebook.getInterpreterFactory().close(); + ZeppelinServer.notebook.close(); } catch (Exception e) { LOG.error("Error while stopping servlet container", e); } diff --git a/zeppelin-zengine/pom.xml b/zeppelin-zengine/pom.xml index e4310891ff0..e9de7482078 100644 --- a/zeppelin-zengine/pom.xml +++ b/zeppelin-zengine/pom.xml @@ -51,13 +51,13 @@ zeppelin-interpreter ${project.version} - + com.amazonaws aws-java-sdk-s3 1.10.1 - + org.slf4j slf4j-api @@ -123,12 +123,6 @@ guava - - junit - junit - test - - org.reflections reflections @@ -151,11 +145,31 @@ 1.4.01 + + org.eclipse.jgit + org.eclipse.jgit + 4.1.1.201511131810-r + + + + junit + junit + test + + org.mockito mockito-all 1.9.0 test - + + + + com.google.truth + truth + 0.27 + test + + diff --git a/zeppelin-zengine/src/main/java/org/apache/zeppelin/notebook/Notebook.java b/zeppelin-zengine/src/main/java/org/apache/zeppelin/notebook/Notebook.java index 66243103e27..481f70838b0 100644 --- a/zeppelin-zengine/src/main/java/org/apache/zeppelin/notebook/Notebook.java +++ b/zeppelin-zengine/src/main/java/org/apache/zeppelin/notebook/Notebook.java @@ -275,7 +275,7 @@ private Note loadNoteFromRepo(String id) { String noteId = snapshot.getAngularObject().getNoteId(); // at this point, remote interpreter process is not created. // so does not make sense add it to the remote. - // + // // therefore instead of addAndNotifyRemoteProcess(), need to use add() // that results add angularObject only in ZeppelinServer side not remoteProcessSide registry.add(name, snapshot.getAngularObject().get(), noteId); @@ -457,4 +457,8 @@ public ZeppelinConfiguration getConf() { return conf; } + public void close() { + this.notebookRepo.close(); + } + } diff --git a/zeppelin-zengine/src/main/java/org/apache/zeppelin/notebook/repo/GitNotebookRepo.java b/zeppelin-zengine/src/main/java/org/apache/zeppelin/notebook/repo/GitNotebookRepo.java new file mode 100644 index 00000000000..fe4975353eb --- /dev/null +++ b/zeppelin-zengine/src/main/java/org/apache/zeppelin/notebook/repo/GitNotebookRepo.java @@ -0,0 +1,126 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.zeppelin.notebook.repo; + +import java.io.File; +import java.io.IOException; +import java.util.List; + +import org.apache.zeppelin.conf.ZeppelinConfiguration; +import org.apache.zeppelin.notebook.Note; +import org.eclipse.jgit.api.Git; +import org.eclipse.jgit.api.errors.GitAPIException; +import org.eclipse.jgit.diff.DiffEntry; +import org.eclipse.jgit.dircache.DirCache; +import org.eclipse.jgit.internal.storage.file.FileRepository; +import org.eclipse.jgit.lib.Repository; +import org.eclipse.jgit.revwalk.RevCommit; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import com.google.common.base.Joiner; +import com.google.common.collect.Lists; + +/** + * NotebookRepo that hosts all the notebook FS in a single Git repo + * + * This impl intended to be simple and straightforward: + * - does not handle branches + * - only basic local git file repo, no remote Github push\pull yet + * + * TODO(bzz): add default .gitignore + */ +public class GitNotebookRepo extends VFSNotebookRepo implements NotebookRepoVersioned { + private static final Logger LOG = LoggerFactory.getLogger(GitNotebookRepo.class); + + private String localPath; + private Git git; + + public GitNotebookRepo(ZeppelinConfiguration conf) throws IOException { + super(conf); + localPath = getRootDir().getName().getPath(); + LOG.info("Opening a git repo at '{}'", localPath); + Repository localRepo = new FileRepository(Joiner.on(File.separator).join(localPath, ".git")); + if (!localRepo.getDirectory().exists()) { + LOG.info("Git repo {} does not exist, creating a new one", localRepo.getDirectory()); + localRepo.create(); + } + git = new Git(localRepo); + maybeAddAndCommit("."); + } + + @Override + public synchronized void save(Note note) throws IOException { + super.save(note); + maybeAddAndCommit(note.getId()); + } + + private void maybeAddAndCommit(String pattern) { + try { + List gitDiff = git.diff().call(); + if (!gitDiff.isEmpty()) { + LOG.debug("Changes found for pattern '{}': {}", pattern, gitDiff); + DirCache added = git.add().addFilepattern(pattern).call(); + LOG.debug("{} changes are about to be commited", added.getEntryCount()); + git.commit().setMessage("Updated " + pattern).call(); + } else { + LOG.debug("No changes found {}", pattern); + } + } catch (GitAPIException e) { + LOG.error("Faild to add+comit {} to Git", pattern, e); + } + } + + @Override + public Note get(String noteId, String rev) throws IOException { + //TODO(bzz): something like 'git checkout rev', that will not change-the-world though + return super.get(noteId); + } + + @Override + public List history(String noteId) { + List history = Lists.newArrayList(); + LOG.debug("Listing history for {}:", noteId); + try { + Iterable logs = git.log().addPath(noteId).call(); + for (RevCommit log: logs) { + history.add(new Rev(log.getName(), log.getCommitTime())); + LOG.debug(" - ({},{})", log.getName(), log.getCommitTime()); + } + } catch (GitAPIException e) { + LOG.error("Failed to get logs for {}", noteId, e); + } + return history; + } + + @Override + public void close() { + git.getRepository().close(); + } + + //DI replacements for Tests + Git getGit() { + return git; + } + + void setGit(Git git) { + this.git = git; + } + + +} diff --git a/zeppelin-zengine/src/main/java/org/apache/zeppelin/notebook/repo/NotebookRepo.java b/zeppelin-zengine/src/main/java/org/apache/zeppelin/notebook/repo/NotebookRepo.java index 07e08758ee4..f8e0b57fa4b 100644 --- a/zeppelin-zengine/src/main/java/org/apache/zeppelin/notebook/repo/NotebookRepo.java +++ b/zeppelin-zengine/src/main/java/org/apache/zeppelin/notebook/repo/NotebookRepo.java @@ -31,4 +31,9 @@ public interface NotebookRepo { public Note get(String noteId) throws IOException; public void save(Note note) throws IOException; public void remove(String noteId) throws IOException; + + /** + * Release any underlying resources + */ + public void close(); } diff --git a/zeppelin-zengine/src/main/java/org/apache/zeppelin/notebook/repo/NotebookRepoSync.java b/zeppelin-zengine/src/main/java/org/apache/zeppelin/notebook/repo/NotebookRepoSync.java index 49f40fcdd70..08156c7b0ed 100644 --- a/zeppelin-zengine/src/main/java/org/apache/zeppelin/notebook/repo/NotebookRepoSync.java +++ b/zeppelin-zengine/src/main/java/org/apache/zeppelin/notebook/repo/NotebookRepoSync.java @@ -36,14 +36,15 @@ /** * Notebook repository sync with remote storage */ -public class NotebookRepoSync implements NotebookRepo{ - private List repos = new ArrayList(); +public class NotebookRepoSync implements NotebookRepo { private static final Logger LOG = LoggerFactory.getLogger(NotebookRepoSync.class); private static final int maxRepoNum = 2; private static final String pushKey = "pushNoteIDs"; private static final String pullKey = "pullNoteIDs"; private static ZeppelinConfiguration config; + private List repos = new ArrayList(); + /** * @param (conf) * @throws - Exception @@ -51,18 +52,19 @@ public class NotebookRepoSync implements NotebookRepo{ public NotebookRepoSync(ZeppelinConfiguration conf) throws Exception { config = conf; - + String allStorageClassNames = conf.getString(ConfVars.ZEPPELIN_NOTEBOOK_STORAGE).trim(); if (allStorageClassNames.isEmpty()) { throw new IOException("Empty ZEPPELIN_NOTEBOOK_STORAGE conf parameter"); } String[] storageClassNames = allStorageClassNames.split(","); if (storageClassNames.length > getMaxRepoNum()) { - throw new IOException("Unsupported number of storage classes (" + + throw new IOException("Unsupported number of storage classes (" + storageClassNames.length + ") in ZEPPELIN_NOTEBOOK_STORAGE"); } for (int i = 0; i < storageClassNames.length; i++) { + @SuppressWarnings("static-access") Class notebookStorageClass = getClass().forName(storageClassNames[i].trim()); Constructor constructor = notebookStorageClass.getConstructor( ZeppelinConfiguration.class); @@ -73,20 +75,26 @@ public NotebookRepoSync(ZeppelinConfiguration conf) throws Exception { } } - /* by default lists from first repository */ + /** + * Lists Notebooks from the first repository + */ + @Override public List list() throws IOException { if (config.getBoolean(ConfVars.ZEPPELIN_NOTEBOOK_RELOAD_FROM_STORAGE) && getRepoCount() > 1) { sync(0, 1); } return getRepo(0).list(); } - + /* list from specific repo (for tests) */ List list(int repoIndex) throws IOException { return getRepo(repoIndex).list(); } - /* by default returns from first repository */ + /** + * Returns from Notebook from the first repository + */ + @Override public Note get(String noteId) throws IOException { return getRepo(0).get(noteId); } @@ -95,8 +103,11 @@ public Note get(String noteId) throws IOException { Note get(int repoIndex, String noteId) throws IOException { return getRepo(repoIndex).get(noteId); } - - /* by default saves to all repos */ + + /** + * Saves to all repositories + */ + @Override public void save(Note note) throws IOException { getRepo(0).save(note); if (getRepoCount() > 1) { @@ -114,6 +125,7 @@ void save(int repoIndex, Note note) throws IOException { getRepo(repoIndex).save(note); } + @Override public void remove(String noteId) throws IOException { for (NotebookRepo repo : repos) { repo.remove(noteId); @@ -122,16 +134,16 @@ public void remove(String noteId) throws IOException { } /** - * copy new/updated notes from source to destination storage + * copy new/updated notes from source to destination storage * @throws IOException */ - public void sync(int sourceRepoIndex, int destRepoIndex) throws IOException { + void sync(int sourceRepoIndex, int destRepoIndex) throws IOException { LOG.info("Sync started"); NotebookRepo sourceRepo = getRepo(sourceRepoIndex); NotebookRepo destRepo = getRepo(destRepoIndex); List sourceNotes = sourceRepo.list(); List destNotes = destRepo.list(); - + Map> noteIDs = notesCheckDiff(sourceNotes, sourceRepo, destNotes, @@ -147,7 +159,7 @@ public void sync(int sourceRepoIndex, int destRepoIndex) throws IOException { } else { LOG.info("Nothing to push"); } - + if (!pullNoteIDs.isEmpty()) { LOG.info("Notes with the following IDs will be pulled"); for (String id : pullNoteIDs) { @@ -157,16 +169,16 @@ public void sync(int sourceRepoIndex, int destRepoIndex) throws IOException { } else { LOG.info("Nothing to pull"); } - + LOG.info("Sync ended"); } public void sync() throws IOException { sync(0, 1); } - + private void pushNotes(List ids, NotebookRepo localRepo, - NotebookRepo remoteRepo) throws IOException { + NotebookRepo remoteRepo) throws IOException { for (String id : ids) { remoteRepo.save(localRepo.get(id)); } @@ -175,7 +187,7 @@ private void pushNotes(List ids, NotebookRepo localRepo, int getRepoCount() { return repos.size(); } - + int getMaxRepoNum() { return maxRepoNum; } @@ -186,14 +198,13 @@ private NotebookRepo getRepo(int repoIndex) throws IOException { } return repos.get(repoIndex); } - - private Map> notesCheckDiff(List sourceNotes, - NotebookRepo sourceRepo, - List destNotes, - NotebookRepo destRepo) throws IOException { + + private Map> notesCheckDiff(List sourceNotes, + NotebookRepo sourceRepo, List destNotes, NotebookRepo destRepo) + throws IOException { List pushIDs = new ArrayList(); List pullIDs = new ArrayList(); - + NoteInfo dnote; Date sdate, ddate; for (NoteInfo snote : sourceNotes) { @@ -218,7 +229,7 @@ private Map> notesCheckDiff(List sourceNotes, pushIDs.add(snote.getId()); } } - + for (NoteInfo note : destNotes) { dnote = containsID(sourceNotes, note.getId()); if (dnote == null) { @@ -226,14 +237,14 @@ private Map> notesCheckDiff(List sourceNotes, pullIDs.add(note.getId()); } } - + Map> map = new HashMap>(); map.put(pushKey, pushIDs); map.put(pullKey, pullIDs); return map; } - private NoteInfo containsID(List notes, String id) { + private NoteInfo containsID(List notes, String id) { for (NoteInfo note : notes) { if (note.getId().equals(id)) { return note; @@ -248,7 +259,7 @@ private NoteInfo containsID(List notes, String id) { private Date lastModificationDate(Note note) { Date latest = new Date(0L); Date tempCreated, tempStarted, tempFinished; - + for (Paragraph paragraph : note.getParagraphs()) { tempCreated = paragraph.getDateCreated(); tempStarted = paragraph.getDateStarted(); @@ -266,7 +277,8 @@ private Date lastModificationDate(Note note) { } return latest; } - + + @SuppressWarnings("unused") private void printParagraphs(Note note) { LOG.info("Note name : " + note.getName()); LOG.info("Note ID : " + note.id()); @@ -274,7 +286,7 @@ private void printParagraphs(Note note) { printParagraph(p); } } - + private void printParagraph(Paragraph paragraph) { LOG.info("Date created : " + paragraph.getDateCreated()); LOG.info("Date started : " + paragraph.getDateStarted()); @@ -282,7 +294,8 @@ private void printParagraph(Paragraph paragraph) { LOG.info("Paragraph ID : " + paragraph.getId()); LOG.info("Paragraph title : " + paragraph.getTitle()); } - + + @SuppressWarnings("unused") private void printNoteInfos(List notes) { LOG.info("The following is a list of note infos"); for (NoteInfo note : notes) { @@ -295,9 +308,17 @@ private void printNoteInfo(NoteInfo note) { LOG.info("ID : " + note.getId()); Map configs = note.getConfig(); for (Map.Entry entry : configs.entrySet()) { - LOG.info("Config Key = " + entry.getKey() + " , Value = " + + LOG.info("Config Key = " + entry.getKey() + " , Value = " + entry.getValue().toString() + "of class " + entry.getClass()); } } + @Override + public void close() { + LOG.info("Closing all notebook storages"); + for (NotebookRepo repo: repos) { + repo.close(); + } + } + } diff --git a/zeppelin-zengine/src/main/java/org/apache/zeppelin/notebook/repo/NotebookRepoVersioned.java b/zeppelin-zengine/src/main/java/org/apache/zeppelin/notebook/repo/NotebookRepoVersioned.java new file mode 100644 index 00000000000..4615afd900d --- /dev/null +++ b/zeppelin-zengine/src/main/java/org/apache/zeppelin/notebook/repo/NotebookRepoVersioned.java @@ -0,0 +1,60 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.zeppelin.notebook.repo; + +import java.io.IOException; +import java.util.List; + +import org.apache.zeppelin.notebook.Note; + +/** + * Notebook repository w/ versions + */ +public interface NotebookRepoVersioned extends NotebookRepo { + + /** + * Get particular revision of the Notebooks + * + * @param noteId Id of the Notebook + * @param rev revision of the Notebook + * @return a Notebook + * @throws IOException + */ + public Note get(String noteId, String rev) throws IOException; + + /** + * List of revisions of the given Notebook + * + * @param noteId id of the Notebook + * @return list of revisions + */ + public List history(String noteId); + + /** + * Represents the 'Revision' a point in life of the notebook + */ + static class Rev { + public Rev(String name, int time) { + this.name = name; + this.time = time; + } + String name; + int time; + } + +} diff --git a/zeppelin-zengine/src/main/java/org/apache/zeppelin/notebook/repo/S3NotebookRepo.java b/zeppelin-zengine/src/main/java/org/apache/zeppelin/notebook/repo/S3NotebookRepo.java index bb9e5d1571d..870aa8635d7 100644 --- a/zeppelin-zengine/src/main/java/org/apache/zeppelin/notebook/repo/S3NotebookRepo.java +++ b/zeppelin-zengine/src/main/java/org/apache/zeppelin/notebook/repo/S3NotebookRepo.java @@ -38,7 +38,6 @@ import com.amazonaws.AmazonClientException; import com.amazonaws.AmazonServiceException; -import com.amazonaws.auth.AWSCredentialsProviderChain; import com.amazonaws.auth.DefaultAWSCredentialsProviderChain; import com.amazonaws.services.s3.AmazonS3; import com.amazonaws.services.s3.AmazonS3Client; @@ -52,13 +51,10 @@ import com.google.gson.GsonBuilder; /** - * - * @author vgmartinez - * + * Backend for storing Notebooks on S3 */ public class S3NotebookRepo implements NotebookRepo { - - Logger logger = LoggerFactory.getLogger(S3NotebookRepo.class); + private static final Logger LOG = LoggerFactory.getLogger(S3NotebookRepo.class); // Use a credential provider chain so that instance profiles can be utilized // on an EC2 instance. The order of locations where credentials are searched @@ -75,13 +71,11 @@ public class S3NotebookRepo implements NotebookRepo { // shared by all AWS SDKs and the AWS CLI // 4. Instance profile credentials delivered through the Amazon EC2 metadata service private AmazonS3 s3client = new AmazonS3Client(new DefaultAWSCredentialsProviderChain()); - private static String bucketName = ""; private String user = ""; - - + private ZeppelinConfiguration conf; - + public S3NotebookRepo(ZeppelinConfiguration conf) throws IOException { this.conf = conf; user = conf.getUser(); @@ -96,11 +90,11 @@ public List list() throws IOException { ListObjectsRequest listObjectsRequest = new ListObjectsRequest() .withBucketName(bucketName) .withPrefix(user + "/" + "notebook"); - ObjectListing objectListing; + ObjectListing objectListing; do { objectListing = s3client.listObjects(listObjectsRequest); - - for (S3ObjectSummary objectSummary : + + for (S3ObjectSummary objectSummary : objectListing.getObjectSummaries()) { if (objectSummary.getKey().contains("note.json")) { try { @@ -109,22 +103,21 @@ public List list() throws IOException { infos.add(info); } } catch (IOException e) { - logger.error("Can't read note ", e); + LOG.error("Can't read note ", e); } } } - listObjectsRequest.setMarker(objectListing.getNextMarker()); } while (objectListing.isTruncated()); } catch (AmazonServiceException ase) { - + } catch (AmazonClientException ace) { - logger.info("Caught an AmazonClientException, " + + LOG.info("Caught an AmazonClientException, " + "which means the client encountered " + "an internal error while trying to communicate" + " with S3, " + "such as not being able to access the network."); - logger.info("Error Message: " + ace.getMessage()); + LOG.info("Error Message: " + ace.getMessage()); } return infos; } @@ -133,10 +126,10 @@ private Note getNote(String key) throws IOException { GsonBuilder gsonBuilder = new GsonBuilder(); gsonBuilder.setPrettyPrinting(); Gson gson = gsonBuilder.create(); - + S3Object s3object = s3client.getObject(new GetObjectRequest( bucketName, key)); - + InputStream ins = s3object.getObjectContent(); String json = IOUtils.toString(ins, conf.getString(ConfVars.ZEPPELIN_ENCODING)); ins.close(); @@ -167,20 +160,18 @@ public void save(Note note) throws IOException { Gson gson = gsonBuilder.create(); String json = gson.toJson(note); String key = user + "/" + "notebook" + "/" + note.id() + "/" + "note.json"; - + File file = File.createTempFile("note", "json"); file.deleteOnExit(); Writer writer = new OutputStreamWriter(new FileOutputStream(file)); - + writer.write(json); writer.close(); - s3client.putObject(new PutObjectRequest( - bucketName, key, file)); + s3client.putObject(new PutObjectRequest(bucketName, key, file)); } - + @Override public void remove(String noteId) throws IOException { - String key = user + "/" + "notebook" + "/" + noteId; final ListObjectsRequest listObjectsRequest = new ListObjectsRequest() .withBucketName(bucketName).withPrefix(key); @@ -193,4 +184,9 @@ public void remove(String noteId) throws IOException { objects = s3client.listNextBatchOfObjects(objects); } while (objects.isTruncated()); } + + @Override + public void close() { + //no-op + } } diff --git a/zeppelin-zengine/src/main/java/org/apache/zeppelin/notebook/repo/VFSNotebookRepo.java b/zeppelin-zengine/src/main/java/org/apache/zeppelin/notebook/repo/VFSNotebookRepo.java index 58f36f298c4..c8a492cac68 100644 --- a/zeppelin-zengine/src/main/java/org/apache/zeppelin/notebook/repo/VFSNotebookRepo.java +++ b/zeppelin-zengine/src/main/java/org/apache/zeppelin/notebook/repo/VFSNotebookRepo.java @@ -54,7 +54,6 @@ public class VFSNotebookRepo implements NotebookRepo { private FileSystemManager fsManager; private URI filesystemRoot; - private ZeppelinConfiguration conf; public VFSNotebookRepo(ZeppelinConfiguration conf) throws IOException { @@ -182,7 +181,7 @@ public Note get(String noteId) throws IOException { return getNote(noteDir); } - private FileObject getRootDir() throws IOException { + protected FileObject getRootDir() throws IOException { FileObject rootDir = fsManager.resolveFile(getPath("/")); if (!rootDir.exists()) { @@ -239,4 +238,10 @@ public void remove(String noteId) throws IOException { noteDir.delete(Selectors.SELECT_SELF_AND_CHILDREN); } + + @Override + public void close() { + //no-op + } + } diff --git a/zeppelin-zengine/src/test/java/org/apache/zeppelin/notebook/repo/GitNotebookRepoTest.java b/zeppelin-zengine/src/test/java/org/apache/zeppelin/notebook/repo/GitNotebookRepoTest.java new file mode 100644 index 00000000000..b92c7a97f64 --- /dev/null +++ b/zeppelin-zengine/src/test/java/org/apache/zeppelin/notebook/repo/GitNotebookRepoTest.java @@ -0,0 +1,115 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.zeppelin.notebook.repo; + +import static com.google.common.truth.Truth.assertThat; + +import java.io.File; +import java.io.IOException; +import java.util.List; + +import org.apache.commons.io.FileUtils; +import org.apache.zeppelin.conf.ZeppelinConfiguration; +import org.apache.zeppelin.conf.ZeppelinConfiguration.ConfVars; +import org.apache.zeppelin.interpreter.mock.MockInterpreter1; +import org.apache.zeppelin.interpreter.mock.MockInterpreter2; +import org.apache.zeppelin.notebook.repo.NotebookRepoVersioned.Rev; +import org.eclipse.jgit.api.Git; +import org.eclipse.jgit.api.errors.GitAPIException; +import org.eclipse.jgit.diff.DiffEntry; +import org.junit.After; +import org.junit.Before; +import org.junit.Test; + +import com.google.common.base.Joiner; + +public class GitNotebookRepoTest { + + private static final String TEST_NOTE_ID = "2A94M5J1Z"; + + private File zeppelinDir; + private String notebooksDir; + private ZeppelinConfiguration conf; + private GitNotebookRepo notebookRepo; + + @Before + public void setUp() throws Exception { + String zpath = System.getProperty("java.io.tmpdir")+"/ZeppelinTest_"+System.currentTimeMillis(); + zeppelinDir = new File(zpath); + zeppelinDir.mkdirs(); + new File(zeppelinDir, "conf").mkdirs(); + + notebooksDir = Joiner.on(File.separator).join(zpath, "notebook"); + File notebookDir = new File(notebooksDir); + notebookDir.mkdirs(); + + String testNoteDir = Joiner.on(File.separator).join(notebooksDir, TEST_NOTE_ID); + FileUtils.copyDirectory(new File(Joiner.on(File.separator).join("src", "test", "resources", TEST_NOTE_ID)), + new File(testNoteDir) + ); + + System.setProperty(ConfVars.ZEPPELIN_HOME.getVarName(), zeppelinDir.getAbsolutePath()); + System.setProperty(ConfVars.ZEPPELIN_NOTEBOOK_DIR.getVarName(), notebookDir.getAbsolutePath()); + System.setProperty(ConfVars.ZEPPELIN_INTERPRETERS.getVarName(), "org.apache.zeppelin.interpreter.mock.MockInterpreter1,org.apache.zeppelin.interpreter.mock.MockInterpreter2"); + System.setProperty(ConfVars.ZEPPELIN_NOTEBOOK_STORAGE.getVarName(), "org.apache.zeppelin.notebook.repo.GitNotebookRepo"); + + MockInterpreter1.register("mock1", "org.apache.zeppelin.interpreter.mock.MockInterpreter1"); + MockInterpreter2.register("mock2", "org.apache.zeppelin.interpreter.mock.MockInterpreter2"); + + conf = ZeppelinConfiguration.create(); + } + + @After + public void tearDown() throws Exception { + NotebookRepoSyncTest.delete(zeppelinDir); + } + + @Test + public void initNonemptyNotebookDir() throws IOException, GitAPIException { + //given - .git does not exit + File dotGit = new File(Joiner.on(File.separator).join(notebooksDir, ".git")); + assertThat(dotGit.exists()).isEqualTo(false); + + //when + notebookRepo = new GitNotebookRepo(conf); + + //then + Git git = notebookRepo.getGit(); + assertThat(git).isNotNull(); + + assertThat(dotGit.exists()).isEqualTo(true); + assertThat(notebookRepo.list()).isNotEmpty(); + + List diff = git.diff().call(); + assertThat(diff).isEmpty(); + } + + @Test + public void showNotebookHistory() throws GitAPIException, IOException { + //given + notebookRepo = new GitNotebookRepo(conf); + assertThat(notebookRepo.list()).isNotEmpty(); + + //when + List testNotebookHistory = notebookRepo.history(TEST_NOTE_ID); + + //then + assertThat(testNotebookHistory).isNotEmpty(); + } + +} diff --git a/zeppelin-zengine/src/test/java/org/apache/zeppelin/notebook/repo/NotebookRepoSyncTest.java b/zeppelin-zengine/src/test/java/org/apache/zeppelin/notebook/repo/NotebookRepoSyncTest.java index 8d33d51b0d1..64d9b32444d 100644 --- a/zeppelin-zengine/src/test/java/org/apache/zeppelin/notebook/repo/NotebookRepoSyncTest.java +++ b/zeppelin-zengine/src/test/java/org/apache/zeppelin/notebook/repo/NotebookRepoSyncTest.java @@ -46,7 +46,7 @@ import org.slf4j.LoggerFactory; -public class NotebookRepoSyncTest implements JobListenerFactory{ +public class NotebookRepoSyncTest implements JobListenerFactory { private File mainZepDir; private ZeppelinConfiguration conf; @@ -215,7 +215,7 @@ public void testSyncOnList() throws IOException { assertEquals(1, notebookRepoSync.list(1).size()); } - private void delete(File file){ + static void delete(File file){ if(file.isFile()) file.delete(); else if(file.isDirectory()){ File [] files = file.listFiles(); diff --git a/zeppelin-zengine/src/test/resources/log4j.properties b/zeppelin-zengine/src/test/resources/log4j.properties new file mode 100644 index 00000000000..001a222535d --- /dev/null +++ b/zeppelin-zengine/src/test/resources/log4j.properties @@ -0,0 +1,48 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# Direct log messages to stdout +log4j.appender.stdout=org.apache.log4j.ConsoleAppender +log4j.appender.stdout.Target=System.out +log4j.appender.stdout.layout=org.apache.log4j.PatternLayout +log4j.appender.stdout.layout.ConversionPattern=%d{ABSOLUTE} %5p %c:%L - %m%n +#log4j.appender.stdout.layout.ConversionPattern= +#%5p [%t] (%F:%L) - %m%n +#%-4r [%t] %-5p %c %x - %m%n +# + +# Root logger option +log4j.rootLogger=INFO, stdout + +log4j.logger.org.apache.zeppelin.notebook.repo=DEBUG + +#mute some noisy guys +log4j.logger.org.apache.hadoop.mapred=WARN +log4j.logger.org.apache.hadoop.hive.ql=WARN +log4j.logger.org.apache.hadoop.hive.metastore=WARN +log4j.logger.org.apache.haadoop.hive.service.HiveServer=WARN +log4j.logger.org.apache.zeppelin.scheduler=WARN + +log4j.logger.org.quartz=WARN +log4j.logger.DataNucleus=WARN +log4j.logger.DataNucleus.MetaData=ERROR +log4j.logger.DataNucleus.Datastore=ERROR + +# Log all JDBC parameters +log4j.logger.org.hibernate.type=ALL + +