## Generate single-cell images [image: .md][image]

Here, we are going to process the previously ingested microscopy
images with the scPortrait pipeline to generate single-cell images
that we can use to assess autophagosome formation at a single-cell
level.

 import lamindb as ln
 from collections.abc import Iterable

 from pathlib import Path
 from scportrait.pipeline.extraction import HDF5CellExtraction
 from scportrait.pipeline.project import Project
 from scportrait.pipeline.segmentation.workflows import CytosolSegmentationCellpose

 ln.track()

# Query microscopy images

First, we query for the raw and annotated microscopy images.

 input_images = ln.Artifact.filter(
 ulabels__name="autophagy imaging", description__icontains="raw image", suffix=".tif"
 )

The experiment includes two genotypes ("WT" and "EI24KO") under two
treatment conditions ("unstimulated" vs. "14h Torin-1"). Multiple
clonal cell lines were imaged for each condition across several fields
of view (FOVs) and imaging channels.

We'll extract single-cell images from each FOV and annotate them with
metadata including genotype, treatment condition, clonal cell line,
and imaging experiment.

 input_images_df = input_images.to_dataframe(features=True)

 display(input_images_df.head())

 conditions = input_images_df["stimulation"].unique().tolist()
 cell_line_clones = input_images_df["cell_line_clone"].unique().tolist()
 FOVs = input_images_df["FOV"].unique().tolist()

Alternatively, we can query for the "ULabel" directly:

 conditions = ln.ULabel.filter(
 links_artifact__feature__name="stimulation", artifacts__in=input_images
 ).distinct()

 cell_line_clones = ln.ULabel.filter(
 links_artifact__feature__name="cell_line_clone", artifacts__in=input_images
 ).distinct()

 FOVs = ln.ULabel.filter(
 links_artifact__feature__name="FOV", artifacts__in=input_images
 ).distinct()

By iterating through conditions, cell lines and FOVs, we should only
have 3 images showing a single FOV to enable processing using
scPortrait.

 # Create artifact type feature and associated label
 ln.Feature(name="artifact type", dtype=ln.ULabel).save()
 ln.ULabel(name="scportrait config").save()

 # Load config file for processing all datasets
 config_file_af = ln.Artifact.connect("scportrait/examples").get(
 key="input_data_imaging_usecase/config.yml"
 )

 config_file_af.description = (
 "config for scportrait for processing of cells stained for autophagy markers"
 )
 config_file_af.save()

 # Annotate the config file with the metadata relevant to the study
 config_file_af.features.add_values(
 {"study": "autophagy imaging", "artifact type": "scportrait config"}
 )

# Process images with scPortrait

Let's take a look at the processing of one example FOV.

 # Get input images for one example FOV
 condition, cellline, FOV = conditions[0], cell_line_clones[0], FOVs[0]

 images = (
 input_images.filter(ulabels=condition)
 .filter(ulabels=cellline)
 .filter(ulabels=FOV)
 .distinct()
 )

 # Quick sanity check - all images should share metadata except channel/structure
 values_to_ignore = ["channel", "imaged structure"]
 features = images.first().features.get_values()
 shared_features = {k: v for k, v in features.items() if k not in values_to_ignore}

 for image in images:
 image_features = image.features.get_values()
 filtered_features = {
 k: v for k, v in image_features.items() if k not in values_to_ignore
 }
 assert shared_features == filtered_features

 # Get image paths in correct channel order
 input_image_paths = [
 images.filter(ulabels__name=channel).one().cache()
 for channel in ["DAPI", "Alexa488", "mCherry"]
 ]

 # Create output directory and unique project ID
 output_directory = "processed_data"
 unique_project_id = f"{shared_features['cell_line_clone']}/{shared_features['stimulation']}/{shared_features['FOV']}".replace(
 " ", "_"
 )
 project_location = f"{output_directory}/{unique_project_id}/scportrait_project"

 # Create directories
 Path(project_location).mkdir(parents=True, exist_ok=True)

 # Initialize the scPortrait project
 project = Project(
 project_location=project_location,
 config_path=config_file_af.cache(),
 segmentation_f=CytosolSegmentationCellpose,
 extraction_f=HDF5CellExtraction,
 overwrite=True,
 )

 # Load images and process
 project.load_input_from_tif_files(
 input_image_paths, overwrite=True, channel_names=["DAPI", "Alexa488", "mCherry"]
 )
 project.segment()
 project.extract()

Let's look at the input images we processed.

 project.plot_input_image()

Now we can look at the results generated by scPortrait. First, the
segmentation masks.

 project.plot_segmentation_masks()

And then extraction results consisting of individual single-cell
images over all of the channels.

 project.plot_single_cell_images()

# Save and annotate results

Now we also want to save these results to the instance.

 ln.Artifact.from_spatialdata(
 sdata=project.filehandler.get_sdata(),
 description="scportrait spatialdata object containing results of cells stained for autophagy markers",
 key=f"processed_data_imaging_use_case/{unique_project_id}/spatialdata.zarr",
 ).save()

 # Define schemas for single-cell image dataset
 schemas = {
 "obs": ln.Schema(
 name="single-cell image dataset schema obs",
 features=[
 ln.Feature(name="scportrait_cell_id", dtype="int", coerce_dtype=True).save()
 ],
 ).save(),
 "uns": ln.Schema(
 name="single-cell image dataset schema uns",
 features=[ln.Feature(name="single_cell_images", dtype=dict).save()],
 ).save(),
 }

 # Create composite schema
 h5sc_schema = ln.Schema(
 name="single-cell image dataset",
 otype="AnnData",
 slots=schemas,
 ).save()

 # Curate the AnnData object
 curator = ln.curators.AnnDataCurator(project.h5sc, h5sc_schema)
 curator.validate()

 # Save artifact with annotations
 artifact = curator.save_artifact(
 key=f"processed_data_imaging_use_case/{unique_project_id}/single_cell_data.h5ad"
 )

 # Add metadata and labels
 annotation = shared_features.copy()
 annotation["imaged structure"] = [
 ln.ULabel.connect("scportrait/examples").get(name=name)
 for name in ["LckLip-mNeon", "DNA", "mCherry-LC3B"]
 ]

 artifact.features.add_values(annotation)
 artifact.labels.add(ln.ULabel(name="scportrait single-cell images").save())

To process all files in our dataset efficiently, we'll create a custom
image processing function. We decorate this function with "tracked()"
to track data lineage of the input and output artifacts.

The function will skip files that have already been processed and
uploaded, improving processing time by avoiding redundant
computations.

 @ln.tracked()
 def process_images(
 config_file_af: ln.Artifact,
 input_artifacts: Iterable[ln.Artifact],
 h5sc_schema: ln.Schema,
 output_directory: str,
 ) -> None:
 # Quick sanity check - all images should share metadata except channel/structure
 values_to_ignore = ["channel", "imaged structure"]
 first_features = input_artifacts.first().features.get_values()
 shared_features = {
 k: v for k, v in first_features.items() if k not in values_to_ignore
 }

 for artifact in input_artifacts:
 artifact_features = artifact.features.get_values()
 filtered_features = {
 k: v for k, v in artifact_features.items() if k not in values_to_ignore
 }
 assert shared_features == filtered_features

 # Create a unique project ID
 unique_project_id = f"{shared_features['cell_line_clone']}/{shared_features['stimulation']}/{shared_features['FOV']}".replace(
 " ", "_"
 )

 # Check if already processed
 base_key = f"processed_data_imaging_use_case/{unique_project_id}"
 try:
 ln.Artifact.connect("scportrait/examples").get(
 key=f"{base_key}/single_cell_data.h5ad"
 )
 ln.Artifact.connect("scportrait/examples").get(
 key=f"{base_key}/spatialdata.zarr"
 )
 print("Dataset already processed. Skipping.")
 return
 except ln.Artifact.DoesNotExist:
 pass

 # Get image paths in channel order
 input_image_paths = [
 input_artifacts.filter(ulabels__name=channel).one().cache()
 for channel in ["DAPI", "Alexa488", "mCherry"]
 ]

 # Setup and process project
 project_location = f"{output_directory}/{unique_project_id}/scportrait_project"
 Path(project_location).mkdir(parents=True, exist_ok=True)

 project = Project(
 project_location=project_location,
 config_path=config_file_af.cache(),
 segmentation_f=CytosolSegmentationCellpose,
 extraction_f=HDF5CellExtraction,
 overwrite=True,
 )

 project.load_input_from_tif_files(
 input_image_paths, overwrite=True, channel_names=["DAPI", "Alexa488", "mCherry"]
 )
 project.segment()
 project.extract()

 # Save single-cell images
 curator = ln.curators.AnnDataCurator(project.h5sc, h5sc_schema)
 curator.validate()
 artifact = curator.save_artifact(key=f"{base_key}/single_cell_data.h5ad")

 annotation = shared_features.copy()
 annotation["imaged structure"] = [
 ln.ULabel.connect("scportrait/examples").get(name=name)
 for name in ["LckLip-mNeon", "DNA", "mCherry-LC3B"]
 ]

 artifact.features.add_values(annotation)
 artifact.labels.add(ln.ULabel.get(name="scportrait single-cell images"))

 # Save SpatialData object
 ln.Artifact.from_spatialdata(
 sdata=project.filehandler.get_sdata(),
 description="scportrait spatialdata object containing results of cells stained for autophagy markers",
 key=f"{base_key}/spatialdata.zarr",
 ).save()

 ln.Param(name="output_directory", dtype="str").save()

Now we are ready to process all of our input images and upload the
generated single-cell image datasets back to our instance.

 for condition in conditions:
 for cellline in cell_line_clones:
 for FOV in FOVs:
 images = (
 input_images.filter(ulabels=condition)
 .filter(ulabels=cellline)
 .filter(ulabels=FOV)
 .distinct()
 )

 if images:
 process_images(
 config_file_af,
 input_artifacts=images,
 h5sc_schema=h5sc_schema,
 output_directory=output_directory,
 )

 example_artifact = ln.Artifact.filter(
 ulabels=ln.ULabel.get(name="scportrait single-cell images")
 ).first()

 example_artifact.view_lineage()

 ln.finish()