loaders.py
Source: sunholo/chunker/loaders.py
Functions
convert_to_txt(file_path)
No docstring available.
convert_to_txt_and_extract(gs_file, split=False)
No docstring available.
ignore_files(filepath)
Returns True if the given path's file extension is found within config.json "code_extensions" array Returns False if not
read_file_to_documents(gs_file: pathlib.Path, metadata: dict = None)
No docstring available.
read_gdrive_to_document(url: str, metadata: dict = None)
No docstring available.
read_git_repo(clone_url, branch='main', metadata=None)
No docstring available.
read_url_to_document(url: str, metadata: dict = None)
No docstring available.
Classes
MyGoogleDriveLoader
.. deprecated:: 0.0.32 Use langchain_google_community.GoogleDriveLoader
instead.
Load Google Docs from Google Drive
.
-
eq(self, other: Any) -> bool
- Return self==value.
-
getstate(self) -> 'DictAny'
- Helper for pickle.
-
init(self, url, *args, **kwargs)
- Create a new model by parsing and validating input data from keyword arguments.
Raises ValidationError if the input data cannot be parsed to form a valid model.
-
iter(self) -> 'TupleGenerator'
- so
dict(model)
works
- so
-
json_encoder(obj: Any) -> Any
- No docstring available.
-
pretty(self, fmt: Callable[[Any], Any], **kwargs: Any) -> Generator[Any, NoneType, NoneType]
- Used by devtools (https://python-devtools.helpmanual.io/) to provide a human readable representations of objects
-
repr(self) -> str
- Return repr(self).
-
repr_args(self) -> 'ReprArgs'
- Returns the attributes to show in str, repr, and pretty this is generally overridden.
Can either return:
-
name - value pairs, e.g.:
[('foo_name', 'foo'), ('bar_name', ['b', 'a', 'r'])]
-
or, just values, e.g.:
[(None, 'foo'), (None, ['b', 'a', 'r'])]
-
repr_name(self) -> str
- Name of the instance's class, used in repr.
-
repr_str(self, join_str: str) -> str
- No docstring available.
-
rich_repr(self) -> 'RichReprResult'
- Get fields for Rich library
-
setattr(self, name, value)
- Implement setattr(self, name, value).
-
setstate(self, state: 'DictAny') -> None
- No docstring available.
-
str(self) -> str
- Return str(self).
-
_calculate_keys(self, include: Optional[ForwardRef('MappingIntStrAny')], exclude: Optional[ForwardRef('MappingIntStrAny')], exclude_unset: bool, update: Optional[ForwardRef('DictStrAny')] = None) -> Optional[AbstractSet[str]]
- No docstring available.
-
_copy_and_set_values(self: 'Model', values: 'DictStrAny', fields_set: 'SetStr', *, deep: bool) -> 'Model'
- No docstring available.
-
_extract_id(self, url)
- No docstring available.
-
_fetch_files_recursive(self, service: Any, folder_id: str) -> List[Dict[str, Union[str, List[str]]]]
- Fetch all files and subfolders recursively.
-
_init_private_attributes(self) -> None
- No docstring available.
-
_iter(self, to_dict: bool = False, by_alias: bool = False, include: Union[ForwardRef('AbstractSetIntStr'), ForwardRef('MappingIntStrAny'), NoneType] = None, exclude: Union[ForwardRef('AbstractSetIntStr'), ForwardRef('MappingIntStrAny'), NoneType] = None, exclude_unset: bool = False, exclude_defaults: bool = False, exclude_none: bool = False) -> 'TupleGenerator'
- No docstring available.
-
_load_credentials(self) -> Any
- Load credentials. The order of loading credentials:
- Service account key if file exists
- Token path (for OAuth Client) if file exists
- Credentials path (for OAuth Client) if file exists
- Default credentials. if no credentials found, raise DefaultCredentialsError
-
_load_document_from_id(self, id: str) -> langchain_core.documents.base.Document
- Load a document from an ID.
-
_load_documents_from_folder(self, folder_id: str, *, file_types: Optional[Sequence[str]] = None) -> List[langchain_core.documents.base.Document]
- Load documents from a folder.
-
_load_documents_from_ids(self) -> List[langchain_core.documents.base.Document]
- Load documents from a list of IDs.
-
_load_file_from_id(self, id: str) -> List[langchain_core.documents.base.Document]
- Load a file from an ID.
-
_load_file_from_ids(self) -> List[langchain_core.documents.base.Document]
- Load files from a list of IDs.
-
_load_sheet_from_id(self, id: str) -> List[langchain_core.documents.base.Document]
- Load a sheet and all tabs from an ID.
-
alazy_load(self) -> 'AsyncIterator[Document]'
- A lazy loader for Documents.
-
aload(self) -> 'List[Document]'
- Load data into Document objects.
-
copy(self: 'Model', *, include: Union[ForwardRef('AbstractSetIntStr'), ForwardRef('MappingIntStrAny'), NoneType] = None, exclude: Union[ForwardRef('AbstractSetIntStr'), ForwardRef('MappingIntStrAny'), NoneType] = None, update: Optional[ForwardRef('DictStrAny')] = None, deep: bool = False) -> 'Model'
- Duplicate a model, optionally choose which fields to include, exclude and change.
:param include: fields to include in new model
:param exclude: fields to exclude from new model, as with values this takes precedence over include
:param update: values to change/add in the new model. Note: the data is not validated before creating
the new model: you should trust this data
:param deep: set to True
to make a deep copy of the model
:return: new model instance
-
dict(self, *, include: Union[ForwardRef('AbstractSetIntStr'), ForwardRef('MappingIntStrAny'), NoneType] = None, exclude: Union[ForwardRef('AbstractSetIntStr'), ForwardRef('MappingIntStrAny'), NoneType] = None, by_alias: bool = False, skip_defaults: Optional[bool] = None, exclude_unset: bool = False, exclude_defaults: bool = False, exclude_none: bool = False) -> 'DictStrAny'
- Generate a dictionary representation of the model, optionally specifying which fields to include or exclude.
-
json(self, *, include: Union[ForwardRef('AbstractSetIntStr'), ForwardRef('MappingIntStrAny'), NoneType] = None, exclude: Union[ForwardRef('AbstractSetIntStr'), ForwardRef('MappingIntStrAny'), NoneType] = None, by_alias: bool = False, skip_defaults: Optional[bool] = None, exclude_unset: bool = False, exclude_defaults: bool = False, exclude_none: bool = False, encoder: Optional[Callable[[Any], Any]] = None, models_as_dict: bool = True, **dumps_kwargs: Any) -> str
- Generate a JSON representation of the model,
include
andexclude
arguments as perdict()
.
- Generate a JSON representation of the model,
encoder
is an optional function to supply as default
to json.dumps(), other arguments as per json.dumps()
.
-
lazy_load(self) -> 'Iterator[Document]'
- A lazy loader for Documents.
-
load(self) -> List[langchain_core.documents.base.Document]
- Load documents.
-
load_and_split(self, text_splitter: 'Optional[TextSplitter]' = None) -> 'List[Document]'
- Load Documents and split into chunks. Chunks are returned as Documents.
Do not override this method. It should be considered to be deprecated!
Args: text_splitter: TextSplitter instance to use for splitting documents. Defaults to RecursiveCharacterTextSplitter.
Returns: List of Documents.
- load_from_url(self, url: str)
- No docstring available.