forked from machine-learning-exchange/katalog
-
Notifications
You must be signed in to change notification settings - Fork 0
/
codenet_langclass.yaml
64 lines (57 loc) · 2.34 KB
/
codenet_langclass.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
# Copyright 2021 The MLX Contributors
#
# SPDX-License-Identifier: Apache-2.0
id: codenet-langclass
name: CodeNet - Language Classification
description: A small subset of the Project CodeNet dataset, intended to showcase a language classification model. It only comprises 2198 random samples across 10 languages.
version: 1.0.0
created: 2021-05-05
updated: 2021-05-05
format:
- type: C++, Java, Python, other programming languages, csv, text
url: https://en.wikipedia.org/wiki/Programming_language
domain: Solutions to Programming Problems
# Information about the entity that makes the data set available
provider:
name: Data Asset eXchange
url: https://developer.ibm.com/exchanges/data/all/project-codenet/
# identifies where the data set is stored and how it is stored (REQUIRED)
repository:
type: HTTP
url: https://dax-cdn.cdn.appdomain.cloud/dax-project-codenet/1.0.0/Project_CodeNet_LangClass.tar.gz
mime_type: application/x-tar
sha_512:
size: 397K
# REQUIRED; data set license information
license:
commercial: false
name: CDLA Permissive v2.0
url: https://cdla.dev/permissive-2-0/
# REQUIRED; describes relevant files in the data set archive
content:
- pattern: train, test
description: The dataset comprises 2,198 code submissions across 10 programming languages.
records: 2198
size: 397K
type: file
format: various
mime_type: text/plain
# OPTIONAL; Identifies where the data set was obtained from
source:
name: Data Asset eXchange
url: https://developer.ibm.com/exchanges/data/all/project-codenet/
# OPTIONAL; but recommended
seo_tags:
- Solutions to Programming Problems
- Text
# OPTIONAL; assets that complement this data set, e.g. notebooks
related_assets:
- name: Project CodeNet Language Classification
description: Simple experiment that shows how to create and exercise a Keras model to detect the language of a piece of source code
mime_type: application/x-ipynb+json
url: https://github.com/machine-learning-exchange/katalog/blob/main/notebook-samples/src/codenet/Project_CodeNet_LangClass.ipynb
application:
name: MLX
asset_id: notebooks/project-codenet-language-classification
# OPTIONAL; url for the markdown file which describes the asset
readme_url: https://raw.githubusercontent.com/machine-learning-exchange/katalog/main/dataset-samples/codenet_langclass/codenet_langclass.md