108 lines
4 KiB
Python
108 lines
4 KiB
Python
"""
|
|
Module that extracts attachments from notebooks into their own files
|
|
"""
|
|
|
|
# Copyright (c) Jupyter Development Team.
|
|
# Distributed under the terms of the Modified BSD License.
|
|
|
|
import os
|
|
from base64 import b64decode
|
|
|
|
from traitlets import Bool, Unicode
|
|
|
|
from .base import Preprocessor
|
|
|
|
|
|
class ExtractAttachmentsPreprocessor(Preprocessor):
|
|
"""
|
|
Extracts attachments from all (markdown and raw) cells in a notebook.
|
|
The extracted attachments are stored in a directory ('attachments' by default).
|
|
https://nbformat.readthedocs.io/en/latest/format_description.html#cell-attachments
|
|
"""
|
|
|
|
attachments_directory_template = Unicode(
|
|
"{notebook_name}_attachments",
|
|
help="Directory to place attachments if use_separate_dir is True",
|
|
).tag(config=True)
|
|
|
|
use_separate_dir = Bool(
|
|
False,
|
|
help="Whether to use output_files_dir (which ExtractOutput also uses) or "
|
|
"create a separate directory for attachments",
|
|
).tag(config=True)
|
|
|
|
def __init__(self, **kw):
|
|
"""
|
|
Public constructor
|
|
"""
|
|
super().__init__(**kw)
|
|
# directory path,
|
|
self.path_name = "" # will be set in self.preprocess, needs resources
|
|
# Where extracted attachments are stored in resources
|
|
self.resources_item_key = (
|
|
"attachments" # Here as a default, in case someone doesn't want to call preprocess
|
|
)
|
|
|
|
# Add condition and configurability here
|
|
def preprocess(self, nb, resources):
|
|
"""
|
|
Determine some settings and apply preprocessor to notebook
|
|
"""
|
|
if self.use_separate_dir:
|
|
self.path_name = self.attachments_directory_template.format(
|
|
notebook_name=resources["unique_key"]
|
|
)
|
|
# Initialize resources for attachments
|
|
resources["attachment_files_dir"] = self.path_name
|
|
resources["attachments"] = {}
|
|
self.resources_item_key = "attachments"
|
|
else:
|
|
# Use same resources as ExtractOutput
|
|
self.path_name = resources["output_files_dir"]
|
|
self.resources_item_key = "outputs"
|
|
|
|
# Make sure key exists
|
|
if not isinstance(resources[self.resources_item_key], dict):
|
|
resources[self.resources_item_key] = {}
|
|
|
|
nb, resources = super().preprocess(nb, resources)
|
|
return nb, resources
|
|
|
|
def preprocess_cell(self, cell, resources, index):
|
|
"""
|
|
Extract attachments to individual files and
|
|
change references to them.
|
|
E.g.
|
|
''
|
|
becomes
|
|
''
|
|
Assumes self.path_name and self.resources_item_key is set properly (usually in preprocess).
|
|
"""
|
|
if "attachments" in cell:
|
|
for fname in cell.attachments:
|
|
self.log.debug("Encountered attachment %s", fname)
|
|
|
|
# Add file for writer
|
|
|
|
# Right now I don't know of a situation where there would be multiple
|
|
# mime types under same filename, and I can't index into it without the mimetype.
|
|
# So I only read the first one.
|
|
for mimetype in cell.attachments[fname]:
|
|
# convert to bytes and decode
|
|
data = cell.attachments[fname][mimetype].encode("utf-8")
|
|
decoded = b64decode(data)
|
|
break
|
|
|
|
# FilesWriter wants path to be in attachment filename here
|
|
new_filename = os.path.join(self.path_name, fname)
|
|
resources[self.resources_item_key][new_filename] = decoded
|
|
|
|
# Edit the reference to the attachment
|
|
|
|
# os.path.join on windows uses "\\" separator,
|
|
# but files like markdown still want "/"
|
|
if os.path.sep != "/":
|
|
new_filename = new_filename.replace(os.path.sep, "/")
|
|
cell.source = cell.source.replace("attachment:" + fname, new_filename)
|
|
|
|
return cell, resources
|