Skip to content

Commit

Permalink
✨ Source Microsoft OneDrive: Add fetching shared items (#35849)
Browse files Browse the repository at this point in the history
  • Loading branch information
tolik0 authored Mar 25, 2024
1 parent 4fae944 commit b52c58d
Show file tree
Hide file tree
Showing 9 changed files with 515 additions and 132 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,13 @@
"name": "test_csv"
},
"stream_state": {
"_ab_source_file_last_modified": "2023-12-23T06:49:25.000000Z_Test_folder_2/TestFileOneDrive.csv",
"history": {
"Test_folder_2/TestFileOneDrive.csv": "2023-12-23T06:49:25.000000Z"
}
"https://airbyte179.sharepoint.com/Shared%20Documents/Test_folder/TestFile.csv": "2023-11-17T13:52:35.000000Z",
"Test_folder_2/TestFileOneDrive.csv": "2023-11-23T06:49:25.000000Z",
"https://airbyte179.sharepoint.com/Shared%20Documents/Test_folder/Test_folder_2/TestFileSharePoint.csv": "2023-12-15T17:34:08.000000Z",
"https://airbyte179.sharepoint.com/Shared%20Documents/Test_folder/simple_test.csv": "2024-01-16T12:45:20.000000Z"
},
"_ab_source_file_last_modified": "2024-01-16T12:45:20.000000Z_https://airbyte179.sharepoint.com/Shared%20Documents/Test_folder/simple_test.csv"
}
}
},
Expand All @@ -20,10 +23,11 @@
"name": "test_unstructured"
},
"stream_state": {
"_ab_source_file_last_modified": "2023-12-23T06:49:25.000000Z_simple_pdf_file.pdf",
"history": {
"simple_pdf_file.pdf": "2023-12-23T06:49:25.000000Z"
}
"simple_pdf_file.pdf": "2023-12-13T11:24:38.000000Z",
"https://airbyte179.sharepoint.com/Shared%20Documents/Test_folder/Test_foler_2_1/simple_pdf_file.pdf": "2023-12-15T16:47:21.000000Z"
},
"_ab_source_file_last_modified": "2023-12-15T16:47:21.000000Z_https://airbyte179.sharepoint.com/Shared%20Documents/Test_folder/Test_foler_2_1/simple_pdf_file.pdf"
}
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,8 @@
},
"primary_key": {
"title": "Primary Key",
"description": "The column or columns (for a composite key) that serves as the unique identifier of a record.",
"description": "The column or columns (for a composite key) that serves as the unique identifier of a record. If empty, the primary key will default to the parser's default primary key.",
"airbyte_hidden": true,
"type": "string"
},
"days_to_sync_if_history_is_full": {
Expand Down Expand Up @@ -274,12 +275,46 @@
"const": "unstructured",
"type": "string"
},
"skip_unprocessable_file_types": {
"title": "Skip Unprocessable File Types",
"description": "If true, skip files that cannot be parsed because of their file type and log a warning. If false, fail the sync. Corrupted files with valid file types will still result in a failed sync.",
"skip_unprocessable_files": {
"title": "Skip Unprocessable Files",
"description": "If true, skip files that cannot be parsed and pass the error message along as the _ab_source_file_parse_error field. If false, fail the sync.",
"default": true,
"always_show": true,
"type": "boolean"
},
"strategy": {
"title": "Parsing Strategy",
"description": "The strategy used to parse documents. `fast` extracts text directly from the document which doesn't work for all files. `ocr_only` is more reliable, but slower. `hi_res` is the most reliable, but requires an API key and a hosted instance of unstructured and can't be used with local mode. See the unstructured.io documentation for more details: https://unstructured-io.github.io/unstructured/core/partition.html#partition-pdf",
"default": "auto",
"always_show": true,
"order": 0,
"enum": ["auto", "fast", "ocr_only", "hi_res"],
"type": "string"
},
"processing": {
"title": "Processing",
"description": "Processing configuration",
"default": {
"mode": "local"
},
"type": "object",
"oneOf": [
{
"title": "Local",
"type": "object",
"properties": {
"mode": {
"title": "Mode",
"default": "local",
"const": "local",
"enum": ["local"],
"type": "string"
}
},
"description": "Process files locally, supporting `fast` and `ocr` modes. This is the default option.",
"required": ["mode"]
}
]
}
},
"description": "Extract text from document formats (.pdf, .docx, .md, .pptx) and emit as one record per file.",
Expand Down Expand Up @@ -400,20 +435,39 @@
"order": 2,
"type": "string"
},
"search_scope": {
"title": "Search Scope",
"description": "Specifies the location(s) to search for files. Valid options are 'ACCESSIBLE_DRIVES' to search in the selected OneDrive drive, 'SHARED_ITEMS' for shared items the user has access to, and 'ALL' to search both.",
"default": "ALL",
"enum": ["ACCESSIBLE_DRIVES", "SHARED_ITEMS", "ALL"],
"order": 3,
"type": "string"
},
"folder_path": {
"title": "Folder Path",
"description": "Path to folder of the Microsoft OneDrive drive where the file(s) exist.",
"order": 3,
"description": "Path to a specific folder within the drives to search for files. Leave empty to search all folders of the drives. This does not apply to shared items.",
"default": ".",
"order": 4,
"type": "string"
}
},
"required": ["streams", "credentials", "folder_path"]
"required": ["streams", "credentials"]
},
"advanced_auth": {
"auth_flow_type": "oauth2.0",
"predicate_key": ["credentials", "auth_type"],
"predicate_value": "Client",
"oauth_config_specification": {
"oauth_user_input_from_connector_config_specification": {
"type": "object",
"additionalProperties": false,
"properties": {
"tenant_id": {
"type": "string",
"path_in_connector_config": ["credentials", "tenant_id"]
}
}
},
"complete_oauth_output_specification": {
"type": "object",
"additionalProperties": false,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ data:
connectorSubtype: file
connectorType: source
definitionId: 01d1c685-fd4a-4837-8f4c-93fe5a0d2188
dockerImageTag: 0.1.9
dockerImageTag: 0.2.0
dockerRepository: airbyte/source-microsoft-onedrive
githubIssueLabel: source-microsoft-onedrive
icon: microsoft-onedrive.svg
Expand Down
Loading

0 comments on commit b52c58d

Please sign in to comment.