Update from private repo

access-ci-org · Jul 15, 2024 · bfea27d · bfea27d
1 parent 14b1cf6
commit bfea27d
Show file tree

Hide file tree

Showing 1,897 changed files with 92,123 additions and 0 deletions.
diff --git a/app/data/JSON/IGNORE - FIX/all-architectures.json b/app/data/JSON/IGNORE - FIX/all-architectures.json
@@ -0,0 +1,31 @@
+error
+{
+    "tools": [
+        {
+            "software_name": "AutoCAD",
+            "comprehensive_overview": "AutoCAD is a computer-aided design (CAD) software used for creating 2D and 3D drawings and models.",
+            "core_features": "Features include drawing and annotation tools, customizable UI, 3D modeling and visualization, documentation tools, and collaboration capabilities.",
+            "general_tags": ["CAD software", "Design software"],
+            "additional_tags": {
+                "research_discipline": [],
+                "research_area": [],
+                "software_class": "Architectural design software",
+                "software_type": "Commercial",
+                "field_of_science": "Engineering and Technology"
+            }
+        },
+        {
+            "software_name": "Revit",
+            "comprehensive_overview": "Revit is a building information modeling (BIM) software used by architects, engineers, and construction professionals for designing and documenting building projects.",
+            "core_features": "Key features include parametric modeling, collaborative design, interoperability, and construction documentation.",
+            "general_tags": ["BIM software", "Architectural design software"],
+            "additional_tags": {
+                "research_discipline": [],
+                "research_area": [],
+                "software_class": "Architectural design software",
+                "software_type": "Commercial",
+                "field_of_science": "Engineering and Technology"
+            }
+        }
+    ]
+}
diff --git a/app/data/JSON/IGNORE - FIX/allocations.json b/app/data/JSON/IGNORE - FIX/allocations.json
@@ -0,0 +1,22 @@
+error
+{
+    "Allocations": {
+        "overview": "Allocations is a command-line utility for managing memory allocations in C and C++ programs. It helps in tracking memory usage, detecting memory leaks, and optimizing memory allocation practices.",
+        "core_features": [
+            "Memory tracking",
+            "Memory leak detection",
+            "Memory optimization",
+            "Command-line interface"
+        ],
+        "general_tags": [
+            "Memory Allocation",
+            "Debugging Tool",
+            "C/C++"
+        ],
+        "research_discipline": [],
+        "research_area": [],
+        "software_class": "Development Tool",
+        "software_type": "Command Line Tool",
+        "field_of_science": "Computer and Information Sciences"
+    }
+}
diff --git a/app/data/JSON/IGNORE - FIX/cimfomfa.json b/app/data/JSON/IGNORE - FIX/cimfomfa.json
@@ -0,0 +1,11 @@
+{
+    "Software": "cimfomfa",
+    "AI Description": "",
+    "Core Features": "",
+    "General Tags": "",
+    "Software Type": "",
+    "Software Class": "",
+    "Research Area": "",
+    "Research Discipline": "",
+    "Research Field": ""
+}
diff --git a/app/data/JSON/IGNORE - FIX/gnu9.json b/app/data/JSON/IGNORE - FIX/gnu9.json
@@ -0,0 +1,32 @@
+{
+  "software_name": "GNU",
+  "comprehensive_overview": "GNU is an extensive collection of free software, which includes an operating system kernel, libraries, compilers, text editors, and numerous tools for software development, system administration, and more. GNU is based on the idea of providing users with the freedom to run, study, redistribute, and modify the software.",
+  "core_features": "GNU provides a wide range of tools and software components, such as the GNU Compiler Collection (GCC), GNU Emacs text editor, GNU Bash shell, GNU Core Utilities, GNU Debugger (GDB), GNU Make, and more. These tools are widely used in the development and maintenance of software applications, as well as in system administration tasks.",
+  "general_tags": ["software", "development", "operating system"],
+  "additional_tags": {
+    "research_discipline": [],
+    "research_area": [],
+    "software_class": "Development Tools",
+    "software_type": "Software Development Tools",
+    "field_of_science": {
+      "Natural Sciences": [],
+      "Mathematics": [],
+      "Computer and Information Sciences": ["Computer Science"],
+      "Physical Sciences": [],
+      "Chemical Sciences": [],
+      "Earth and Environmental Sciences": [],
+      "Biological Sciences": [],
+      "Engineering and Technology": [],
+      "Medical and Health Sciences": [],
+      "Agricultural Sciences": [],
+      "Social Sciences": [],
+      "Humanities": [],
+      "Training": [],
+      "Staff Activities (ACCESS or SP)": [],
+      "Performance Evaluation and Benchmarking": [],
+      "Infrastructure and Instrumentation": [],
+      "Science and Engineering Education": [],
+      "Other": []
+    }
+  }
+}
diff --git a/app/data/JSON/IGNORE - FIX/ml-toolkit-cpukeras.json b/app/data/JSON/IGNORE - FIX/ml-toolkit-cpukeras.json
@@ -0,0 +1,26 @@
+redundant
+{
+  "software_name": "ml-toolkit-cpu/keras",
+  "comprehensive_overview": "Keras is a high-level neural networks API, written in Python, that is capable of running on top of TensorFlow, CNTK, or Theano. It is designed for enabling fast experimentation with deep neural networks and focuses on being user-friendly, modular, and extensible.",
+  "core_features": [
+    "High-level neural networks API",
+    "Integration with TensorFlow, CNTK, and Theano",
+    "User-friendly and modular design",
+    "Support for both CPU and GPU computations",
+    "Easy and fast prototyping of deep learning models"
+  ],
+  "general_tags": [
+    "Machine Learning",
+    "Deep Learning",
+    "Neural Networks",
+    "Python"
+  ],
+  "additional_tags": {
+    "research_discipline": "",
+    "research_area": "",
+    "software_class": "Deep Learning Library",
+    "software_type": "Library",
+    "field_of_science": "Computer and Information Sciences",
+    "specific_field": "Artificial Intelligence and Intelligent Systems"
+  }
+}
diff --git a/app/data/JSON/IGNORE - FIX/ml-toolkit-cpunltk.json b/app/data/JSON/IGNORE - FIX/ml-toolkit-cpunltk.json
@@ -0,0 +1,14 @@
+redundant
+{
+  "software_name": "ml-toolkit-cpu",
+  "comprehensive_overview": "The ml-toolkit-cpu is a machine learning toolkit that runs on central processing units (CPUs). It provides a collection of algorithms and tools for tasks such as classification, regression, clustering, and more.",
+  "core_features": "1. Support for various machine learning algorithms. 2. Easy integration with CPU-based systems. 3. Scalability for handling large datasets. 4. Performance optimization for CPU execution.",
+  "general_tags": ["machine learning", "CPU-based", "toolkit"],
+  "additional_tags": {
+    "research_discipline": "Computer and Information Sciences",
+    "research_area": "Artificial Intelligence and Intelligent Systems",
+    "software_class": "Machine Learning",
+    "software_type": "Toolkit",
+    "field_of_science": "Computer and Information Sciences"
+  }
+}
diff --git a/app/data/JSON/IGNORE - FIX/ml-toolkit-cpuonnx.json b/app/data/JSON/IGNORE - FIX/ml-toolkit-cpuonnx.json
@@ -0,0 +1,25 @@
+redundant
+{
+  "software_name": "ML Toolkit CPU",
+  "comprehensive_overview": "ML Toolkit CPU is a set of tools for machine learning and deep learning tasks that support the ONNX format. It provides functionalities for model inference, optimization, and deployment on CPU-based systems.",
+  "core_features": [
+    "Model inference on CPU",
+    "Optimization of ONNX models",
+    "Deployment of ONNX models on CPU-based systems"
+  ],
+  "general_tags": [
+    "machine learning",
+    "deep learning",
+    "model inference",
+    "ONNX",
+    "deployment",
+    "optimization"
+  ],
+  "additional_tags": {
+    "research_discipline": "",
+    "research_area": "",
+    "software_class": "Machine Learning",
+    "software_type": "Toolkit",
+    "field_of_science": "Computer and Information Sciences"
+  }
+}
diff --git a/app/data/JSON/IGNORE - FIX/ml-toolkit-cpuopencv.json b/app/data/JSON/IGNORE - FIX/ml-toolkit-cpuopencv.json
@@ -0,0 +1,14 @@
+redundant
+{
+    "software_name": "ml-toolkit-cpu",
+    "comprehensive_overview": "ml-toolkit-cpu is a software library that provides a set of tools and utilities for machine learning tasks, specifically designed to run on CPUs. It is optimized for performance and ease of use in implementing machine learning algorithms.",
+    "core_features": "1. Support for various machine learning algorithms. 2. Data preprocessing and feature engineering capabilities. 3. Model evaluation and performance metrics. 4. Integration with popular frameworks like OpenCV. 5. Scalable and efficient processing on CPU architectures.",
+    "general_tags": ["Machine Learning", "CPU", "Toolkit"],
+    "additional_tags": {
+        "research_discipline": "",
+        "research_area": "",
+        "software_class": "Library",
+        "software_type": "",
+        "field_of_science": "Computer and Information Sciences"
+    }
+}
diff --git a/app/data/JSON/IGNORE - FIX/ml-toolkit-cpupytorch.json b/app/data/JSON/IGNORE - FIX/ml-toolkit-cpupytorch.json
@@ -0,0 +1,27 @@
+redundant
+{
+  "software_name": "ML Toolkit (CPU) - PyTorch",
+  "comprehensive_overview": "PyTorch is an open-source machine learning library based on the Torch library, primarily developed by Facebook's AI Research lab (FAIR). It is widely used for applications such as natural language processing, computer vision, and deep learning. PyTorch provides a flexible and dynamic computational graph, allowing for easy experimentation and efficient production deployments.",
+  "core_features": [
+    "Dynamic computational graph",
+    "Support for GPU acceleration",
+    "Large collection of neural network layers and activation functions",
+    "Built-in automatic differentiation",
+    "Easy integration with NumPy",
+    "Simple and intuitive API"
+  ],
+  "general_tags": [
+    "Machine Learning",
+    "Deep Learning",
+    "Neural Networks",
+    "Artificial Intelligence",
+    "Computational Graph"
+  ],
+  "additional_tags": {
+    "research_discipline": "",
+    "research_area": "",
+    "software_class": "Machine Learning Library",
+    "software_type": "Framework",
+    "field_of_science": "Computer and Information Sciences"
+  }
+}
diff --git a/app/data/JSON/IGNORE - FIX/ml-toolkit-cputensorflow.json b/app/data/JSON/IGNORE - FIX/ml-toolkit-cputensorflow.json
@@ -0,0 +1,26 @@
+redundant
+{
+  "software_name": "TensorFlow",
+  "overview": "TensorFlow is an open-source deep learning framework developed by Google. It provides a comprehensive ecosystem of tools, libraries, and community resources for machine learning, including neural networks, natural language processing, and image recognition.",
+  "core_features": [
+    "Flexibility to deploy computation to one or more CPUs or GPUs in a desktop, server, or mobile device",
+    "Scalability to train and deploy machine learning models in production environments",
+    "Rich collection of tools and libraries for simplifying machine learning workflows",
+    "Support for large-scale distributed training and inference"
+  ],
+  "general_tags": [
+    "machine learning",
+    "deep learning",
+    "neural networks",
+    "natural language processing",
+    "image recognition",
+    "open-source"
+  ],
+  "additional_tags": {
+    "research_discipline": "Artificial Intelligence and Intelligent Systems",
+    "research_area": "Machine Learning",
+    "software_class": "Deep Learning Framework",
+    "software_type": "Library",
+    "field_of_science": "Computer and Information Sciences"
+  }
+}
diff --git a/app/data/JSON/IGNORE - FIX/must.json b/app/data/JSON/IGNORE - FIX/must.json
@@ -0,0 +1,60 @@
+error
+{
+	"tools": [
+		{
+			"software_name": "Abinit",
+			"comprehensive_overview": "Abinit is an open-source software package that allows users to perform first-principles calculations of the electronic structure of materials.",
+			"core_features": "Abinit offers a range of functionalities including density functional theory calculations, pseudopotential and all-electron implementations, structural relaxations, molecular dynamics, spectral properties, and more.",
+			"general_tags": ["computational software", "electronic structure calculations", "materials science"],
+			"research_discipline": "Condensed Matter Physics",
+			"research_area": "Electronic Structure of Materials",
+			"software_class": "First-principles calculation software",
+			"software_type": "Density functional theory",
+			"field_of_science": "Physical Sciences"
+		},
+		{
+			"software_name": "BLAST",
+			"comprehensive_overview": "BLAST (Basic Local Alignment Search Tool) is a widely used bioinformatics tool for comparing primary biological sequence information.",
+			"core_features": "BLAST can be used to search nucleotide or protein databases to find sequences that resemble the query sequence.",
+			"general_tags": ["bioinformatics", "sequence analysis", "sequence alignment"],
+			"research_discipline": "Genetics",
+			"research_area": "Sequence Analysis",
+			"software_class": "Sequence alignment software",
+			"software_type": "Bioinformatics tool",
+			"field_of_science": "Biological Sciences"
+		},
+		{
+			"software_name": "GROMACS",
+			"comprehensive_overview": "GROMACS is a versatile package used for molecular dynamics simulations, quantum chemistry calculations, and more.",
+			"core_features": "GROMACS provides high-performance and parallelized algorithms for simulating molecular dynamics of large biomolecular systems.",
+			"general_tags": ["computational chemistry", "molecular dynamics", "biomolecular simulations"],
+			"research_discipline": "Biophysics",
+			"research_area": "Molecular Dynamics Simulations",
+			"software_class": "Molecular dynamics simulation software",
+			"software_type": "Quantum chemistry",
+			"field_of_science": "Biological Sciences"
+		},
+		{
+			"software_name": "MATLAB",
+			"comprehensive_overview": "MATLAB is a high-level programming language and interactive environment for numerical computation, visualization, and programming.",
+			"core_features": "MATLAB provides a wide range of mathematical functions for linear algebra, statistics, optimization, signal processing, image processing, and more.",
+			"general_tags": ["computational software", "numerical computation", "data visualization"],
+			"research_discipline": "Applied Mathematics",
+			"research_area": "Numerical Computation",
+			"software_class": "Numerical computing environment",
+			"software_type": "Programming language",
+			"field_of_science": "Mathematics"
+		},
+		{
+			"software_name": "TensorFlow",
+			"comprehensive_overview": "TensorFlow is an open-source neural network library, developed by Google, capable of performing machine learning and deep learning tasks.",
+			"core_features": "TensorFlow supports building and training deep neural networks for tasks like image recognition, natural language processing, and more.",
+			"general_tags": ["machine learning", "deep learning", "neural networks"],
+			"research_discipline": "Artificial Intelligence and Intelligent Systems",
+			"research_area": "Machine Learning",
+			"software_class": "Deep learning library",
+			"software_type": "Neural network framework",
+			"field_of_science": "Computer and Information Sciences"
+		}
+	]
+}
diff --git a/app/data/JSON/IGNORE - FIX/pals.json b/app/data/JSON/IGNORE - FIX/pals.json
@@ -0,0 +1,4 @@
+error
+{
+    "error": "Tool information not found. Please provide a valid tool name from the list of software, compilers, python libraries, computational software, conda packages, bioinformatics, and HPC tools."
+}
diff --git a/app/data/JSON/IGNORE - FIX/survivor.json b/app/data/JSON/IGNORE - FIX/survivor.json
@@ -0,0 +1,20 @@
+{
+    "Software": "survivor",
+    "AI Description": "Survivor is a reality competition television show where contestants are placed in remote locations and must outwit, outplay, and outlast their competitors to win a grand prize.",
+    "Core Features": "Contestants compete in physical and mental challenges, form alliances, strategize voting tactics, and participate in tribal councils where one contestant is voted out each time until one remains to claim the prize.",
+    "General Tags": [
+        "Television Show",
+        "Reality Competition",
+        "Survival",
+        "Strategy"
+    ],
+    "Software Type": "Television Show",
+    "Software Class": "Entertainment",
+    "Research Field": "Social Sciences",
+    "Research Area": [
+        "Tv Show Production"
+    ],
+    "Research Discipline": [
+        "Media And Communications"
+    ]
+}
diff --git a/app/data/exampleUse/7z.txt b/app/data/exampleUse/7z.txt
@@ -0,0 +1,19 @@
+Use Case: Use 7z to compress or decompress files using the 7z format.
+
+Code Details and Examples:
+
+Code:
+
+Compress files:
+
+7z a test.7z file1.txt file2.txt file3.txt
+
+This command compresses the files "file1.txt", "file2.txt", and "file3.txt" into the archive "test.7z".
+
+Decompress a 7z file:
+
+7z e test.7z
+
+This command extracts all files from the archive "test.7z" in the current directory.
+
+The input file format for the 7z command is any file or directory on your system. No specific details or specifications are typically necessary in the input files. For the compress command, you need to specify the name of the output 7z file and the files you want to compress. For the extract command, you only need to specify the name of the 7z file you want to extract.
diff --git a/app/data/exampleUse/ABySS.txt b/app/data/exampleUse/ABySS.txt
@@ -0,0 +1,37 @@
+Use Case: ABySS (Assembly By Short Sequences) is a de novo, parallel, paired-end sequence assembler that is designed for large genomes. It utilizes the de Bruijn graph data structure to construct contigs, following which scaffolds are assembled using paired-end and mate-paired data.
+
+Code details and examples: Code
+
+**Input file format**: FASTQ and FASTA.
+
+The file contains the DNA sequence data. The file should look something like the first few lines below:
+
+ ```
+@SEQ_ID
+GATTTGGGGTTCAAAGCAGTATCGATCAAATAGTAAATCCATTTGTTCAACTCACAGTTT
++
+!''*((((***+))%%%++)(%%%%).1***-+*''))**55CCF>>>>>>CCCCCCC65
+@SEQ_ID
+GTTTTGGGGTTCAAAGCAGTATCGATCAAATAGTAAATCCATTTGTTCAACTCACAGTTT
++
+!''*((((***+))%%%++)(%%%%).1***-+*''))**55CCF>>>>>>CCCCCCC65
+ ```
+
+For ABySS, to assemble a genome from paired-end reads, separate your reads into two files according to whether they belong to the left end or the right end of the DNA fragments. 
+
+**Command to run code snippet**:
+```
+abyss-pe k=64 name=genome in='reads1.fastq reads2.fastq'
+```
+The `k` parameter stands for the size of the k-mer used for assembly. The `name` parameter identifies the genome being analyzed. The `in` parameter represents the data set, which contains two paired end reads.
+
+**Output file format**:
+The assembly is outputted as a .fasta file. Each contig's identifier indicates the contig's length, the number of k-mer pairs assembled, and other statistical information about the contig. This file can be further analyzed using other bioinformatics software tools. 
+
+For example:
+```
+>1 length=586 numreads=2 gene=isogroup00001 status=it_thresh
+GCCACATACGTCAGGTTACGACCAGTGGATCGAAAGGTTTGTCATGTTTGTCACCAGTGG
+TTAGGGTTAGGATGTGAGTGACAACAGGTTGGTCACATACGTCAGGGTTACGACCAGTGG
+CATCGAAAGGTTTGTCATGTTTGTCACCAGTGGTTAGGGTTAGGATGTGAGTGA
+```