% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/structure.R
\name{glycan_structure}
\alias{glycan_structure}
\alias{is_glycan_structure}
\title{Create a Glycan Structure Vector}
\usage{
glycan_structure(...)

is_glycan_structure(x)
}
\arguments{
\item{...}{igraph graph objects to be converted to glycan structures, or existing
glycan structure vectors. Supports mixed input of multiple objects.}

\item{x}{An object to check or convert.}
}
\value{
A \code{glyrepr_structure} class glycan structure vector object.
}
\description{
\code{glycan_structure()} creates an efficient glycan structure vector for storing and
processing glycan molecular structures. The function employs hash-based deduplication
mechanisms, making it suitable for glycoproteomics, glycomics analysis, and glycan
structure comparison studies.
}
\section{Core Features}{
\itemize{
\item \strong{Efficient Storage}: Uses hash values of IUPAC codes for deduplication,
avoiding redundant storage of identical glycan structures
\item \strong{Graph Model Representation}: Each glycan structure is represented as a directed
graph where nodes are monosaccharides and edges are glycosidic linkages
\item \strong{Vectorized Operations}: Supports R's vectorized operations for batch
processing of glycan data
\item \strong{Type Safety}: Built on the vctrs package, providing type-safe operations
}
}

\section{Data Structure Overview}{
A glycan structure vector is a vctrs record with an additional S3 class
\code{glyrepr_structure}. Therefore, \code{sloop::s3_class()} returns the class hierarchy
\code{c("glyrepr_structure", "vctrs_rcrd")}.

Each glycan structure must satisfy the following constraints:
\subsection{Graph Structure Requirements}{
\itemize{
\item Must be a directed graph with an outward tree structure (reducing end as root)
\item Must have a graph attribute \code{anomer} in the format "a1" or "b1"
\itemize{
\item Unknown parts can be represented with "?", e.g., "?1", "a?", "??"
}
}
}

\subsection{Node Attributes}{
\itemize{
\item \code{mono}: Monosaccharide names, must be known monosaccharide types
\itemize{
\item Generic names: Hex, HexNAc, dHex, NeuAc, etc.
\item Concrete names: Glc, Gal, Man, GlcNAc, etc.
\item Cannot mix generic and concrete names
\item NA values are not allowed
}
\item \code{sub}: Substituent information
\itemize{
\item Single substituent format: "xY" (x = position, Y = substituent name),
e.g., "2Ac", "3S"
\item Multiple substituents separated by commas and ordered by position,
e.g., "3Me,4Ac", "2S,6P"
\item No substituents represented by empty string ""
}
}
}

\subsection{Edge Attributes}{
\itemize{
\item \code{linkage}: Glycosidic linkage information in format "a/bX-Y"
\itemize{
\item Standard format: e.g., "b1-4", "a2-3"
\item Unknown positions allowed: "a1-?", "b?-3", "??-?"
\item Partially unknown positions: "a1-3/6", "a1-3/6/9"
\item NA values are not allowed
}
}
}
}

\section{Node and Edge Order}{
The indices of vertices and linkages in a glycan correspond directly to their
order in the IUPAC-condensed string, which is printed when you print a
\code{\link[=glycan_structure]{glycan_structure()}}.
For example, for the glycan \verb{Man(a1-3)[Man(a1-6)]Man(b1-4)GlcNAc(b1-4)GlcNAc(b1-},
the vertices are "Man", "Man", "Man", "GlcNAc", "GlcNAc",
and the linkages are "a1-3", "a1-6", "b1-4", "b1-4".
}

\section{Use Cases}{
\itemize{
\item \strong{Glycoproteomics Analysis}: Processing glycan structure information from
mass spectrometry data
\item \strong{Glycomics Research}: Comparing glycan expression profiles across different
samples or conditions
\item \strong{Structure-Function Analysis}: Studying relationships between glycan
structures and biological functions
\item \strong{Database Queries}: Performing structure matching and searches in glycan
databases
}
}

\examples{
library(igraph)

# Example 1: Create a simple glycan structure GlcNAc(b1-4)GlcNAc
graph <- make_graph(~ 1-+2)  # Create graph with two monosaccharides
V(graph)$mono <- c("GlcNAc", "GlcNAc")  # Set monosaccharide types
V(graph)$sub <- ""  # No substituents
E(graph)$linkage <- "b1-4"  # b1-4 glycosidic linkage
graph$anomer <- "a1"  # a anomeric carbon

# Create glycan structure vector
simple_struct <- glycan_structure(graph)
print(simple_struct)

# Example 2: Use predefined glycan core structures
n_core <- n_glycan_core()  # N-glycan core structure
o_core1 <- o_glycan_core_1()  # O-glycan Core 1 structure

# Create vector with multiple structures
multi_struct <- glycan_structure(n_core, o_core1)
print(multi_struct)

# Example 3: Create complex structure with substituents
complex_graph <- make_graph(~ 1-+2-+3)
V(complex_graph)$mono <- c("GlcNAc", "Gal", "Neu5Ac")
V(complex_graph)$sub <- c("", "", "")  # Add substituents as needed
E(complex_graph)$linkage <- c("b1-4", "a2-3")
complex_graph$anomer <- "b1"

complex_struct <- glycan_structure(complex_graph)
print(complex_struct)

# Example 4: Check if object is a glycan structure
is_glycan_structure(simple_struct)  # TRUE
is_glycan_structure(graph)          # FALSE

# Example 5: Mix different input types
mixed_struct <- glycan_structure(graph, o_glycan_core_2(), simple_struct)
print(mixed_struct)

}
