graph/encoding/dot: (un)quote attributes if needed during (un)marshal

This commit is contained in:
Robin Eklind
2019-01-14 04:57:56 +01:00
committed by Dan Kortschak
parent 9b1d387736
commit 24f0d081ca
5 changed files with 288 additions and 74 deletions

View File

@@ -6,6 +6,8 @@ package dot
import (
"fmt"
"strconv"
"strings"
"gonum.org/v1/gonum/graph"
"gonum.org/v1/gonum/graph/encoding"
@@ -41,6 +43,10 @@ type PortSetter interface {
// Unmarshal parses the Graphviz DOT-encoded data and stores the result in dst.
// If the number of graphs encoded in data is not one, an error is returned and
// dst will hold the first graph in data.
//
// Attributes and IDs are quoted if needed during marshalling, to conform with
// valid DOT syntax. Quoted IDs and attributes are unquoted during unmarshaling,
// so the data is kept in raw form.
func Unmarshal(data []byte, dst encoding.Builder) error {
file, err := dot.ParseBytes(data)
if err != nil {
@@ -57,6 +63,10 @@ func Unmarshal(data []byte, dst encoding.Builder) error {
// stores the result in dst.
// If the number of graphs encoded in data is not one, an error is returned and
// dst will hold the first graph in data.
//
// Attributes and IDs are quoted if needed during marshalling, to conform with
// valid DOT syntax. Quoted IDs and attributes are unquoted during unmarshaling,
// so the data is kept in raw form.
func UnmarshalMulti(data []byte, dst encoding.MultiBuilder) error {
file, err := dot.ParseBytes(data)
if err != nil {
@@ -88,7 +98,7 @@ func copyGraph(dst encoding.Builder, src *ast.Graph) (err error) {
},
}
if dst, ok := dst.(DOTIDSetter); ok {
dst.SetDOTID(src.ID)
dst.SetDOTID(unquoteID(src.ID))
}
if a, ok := dst.(AttributeSetters); ok {
gen.graphAttr, gen.nodeAttr, gen.edgeAttr = a.DOTAttributeSetters()
@@ -118,7 +128,7 @@ func copyMultigraph(dst encoding.MultiBuilder, src *ast.Graph) (err error) {
},
}
if dst, ok := dst.(DOTIDSetter); ok {
dst.SetDOTID(src.ID)
dst.SetDOTID(unquoteID(src.ID))
}
if a, ok := dst.(AttributeSetters); ok {
gen.graphAttr, gen.nodeAttr, gen.edgeAttr = a.DOTAttributeSetters()
@@ -154,7 +164,7 @@ func (gen *generator) node(dst graph.NodeAdder, id string) graph.Node {
}
n := dst.NewNode()
if n, ok := n.(DOTIDSetter); ok {
n.SetDOTID(id)
n.SetDOTID(unquoteID(id))
}
dst.AddNode(n)
gen.ids[id] = n
@@ -180,8 +190,8 @@ func (gen *simpleGraph) addStmt(dst encoding.Builder, stmt ast.Stmt) {
}
for _, attr := range stmt.Attrs {
a := encoding.Attribute{
Key: attr.Key,
Value: attr.Val,
Key: unquoteID(attr.Key),
Value: unquoteID(attr.Val),
}
if err := n.SetAttribute(a); err != nil {
panic(fmt.Errorf("unable to unmarshal node DOT attribute (%s=%s): %v", a.Key, a.Value, err))
@@ -216,8 +226,8 @@ func (gen *simpleGraph) addStmt(dst encoding.Builder, stmt ast.Stmt) {
}
for _, attr := range stmt.Attrs {
a := encoding.Attribute{
Key: attr.Key,
Value: attr.Val,
Key: unquoteID(attr.Key),
Value: unquoteID(attr.Val),
}
if err := n.SetAttribute(a); err != nil {
panic(fmt.Errorf("unable to unmarshal global %s DOT attribute (%s=%s): %v", dst, a.Key, a.Value, err))
@@ -240,7 +250,7 @@ func applyPortsToEdge(from ast.Vertex, to *ast.Edge, edge graph.Edge) {
if ps, isPortSetter := edge.(PortSetter); isPortSetter {
if n, vertexIsNode := from.(*ast.Node); vertexIsNode {
if n.Port != nil {
err := ps.SetFromPort(n.Port.ID, n.Port.CompassPoint.String())
err := ps.SetFromPort(unquoteID(n.Port.ID), n.Port.CompassPoint.String())
if err != nil {
panic(fmt.Errorf("unable to unmarshal edge port (:%s:%s)", n.Port.ID, n.Port.CompassPoint.String()))
}
@@ -249,7 +259,7 @@ func applyPortsToEdge(from ast.Vertex, to *ast.Edge, edge graph.Edge) {
if n, vertexIsNode := to.Vertex.(*ast.Node); vertexIsNode {
if n.Port != nil {
err := ps.SetToPort(n.Port.ID, n.Port.CompassPoint.String())
err := ps.SetToPort(unquoteID(n.Port.ID), n.Port.CompassPoint.String())
if err != nil {
panic(fmt.Errorf("unable to unmarshal edge DOT port (:%s:%s)", n.Port.ID, n.Port.CompassPoint.String()))
}
@@ -372,8 +382,8 @@ func (gen *multiGraph) addStmt(dst encoding.MultiBuilder, stmt ast.Stmt) {
}
for _, attr := range stmt.Attrs {
a := encoding.Attribute{
Key: attr.Key,
Value: attr.Val,
Key: unquoteID(attr.Key),
Value: unquoteID(attr.Val),
}
if err := n.SetAttribute(a); err != nil {
panic(fmt.Errorf("unable to unmarshal node DOT attribute (%s=%s): %v", a.Key, a.Value, err))
@@ -408,8 +418,8 @@ func (gen *multiGraph) addStmt(dst encoding.MultiBuilder, stmt ast.Stmt) {
}
for _, attr := range stmt.Attrs {
a := encoding.Attribute{
Key: attr.Key,
Value: attr.Val,
Key: unquoteID(attr.Key),
Value: unquoteID(attr.Val),
}
if err := n.SetAttribute(a); err != nil {
panic(fmt.Errorf("unable to unmarshal global %s DOT attribute (%s=%s): %v", dst, a.Key, a.Value, err))
@@ -485,11 +495,30 @@ func addEdgeAttrs(edge graph.Edge, attrs []*ast.Attr) {
}
for _, attr := range attrs {
a := encoding.Attribute{
Key: attr.Key,
Value: attr.Val,
Key: unquoteID(attr.Key),
Value: unquoteID(attr.Val),
}
if err := e.SetAttribute(a); err != nil {
panic(fmt.Errorf("unable to unmarshal edge DOT attribute (%s=%s): %v", a.Key, a.Value, err))
}
}
}
// unquoteID unquotes the given string if needed in the context of an ID. If s
// is not already quoted the original string is returned.
func unquoteID(s string) string {
// To make round-trips idempotent, don't unquote quoted HTML-like strings
//
// /^"<.*>"$/
if len(s) >= 4 && strings.HasPrefix(s, `"<`) && strings.HasSuffix(s, `>"`) {
return s
}
// Unquote quoted string if possible.
if t, err := strconv.Unquote(s); err == nil {
return t
}
// On error, either s is not quoted or s is quoted but contains invalid
// characters, in both cases we return the original string rather than
// panicking.
return s
}

View File

@@ -43,6 +43,14 @@ func TestRoundTrip(t *testing.T) {
want: undirectedWithPorts,
directed: false,
},
{
want: directedAttrs,
directed: true,
},
{
want: undirectedAttrs,
directed: false,
},
}
for i, g := range golden {
var dst encoding.Builder
@@ -165,6 +173,46 @@ const undirectedWithPorts = `strict graph {
E:_ -- F:c;
}`
const directedAttrs = `strict digraph {
node [
shape=circle
style=filled
label="NODE"
];
edge [
penwidth=5
color=gray
label=3.14
];
// Node definitions.
A [label=<br>];
B [label=-14];
// Edge definitions.
A -> B [label="hello world"];
}`
const undirectedAttrs = `strict graph {
node [
shape=circle
style=filled
label="NODE"
];
edge [
penwidth=5
color=gray
label=3.14
];
// Node definitions.
A [label=<br>];
B [label=-14];
// Edge definitions.
A -- B [label="hello world"];
}`
func TestChainedEdgeAttributes(t *testing.T) {
golden := []struct {
in, want string

View File

@@ -8,7 +8,9 @@ import (
"bytes"
"errors"
"fmt"
"regexp"
"sort"
"strconv"
"strings"
"gonum.org/v1/gonum/graph"
@@ -82,15 +84,18 @@ type MultiSubgrapher interface {
Subgraph() graph.Multigraph
}
// Marshal returns the DOT encoding for the graph g, applying the prefix
// and indent to the encoding. Name is used to specify the graph name. If
// name is empty and g implements Graph, the returned string from DOTID
// will be used.
// Marshal returns the DOT encoding for the graph g, applying the prefix and
// indent to the encoding. Name is used to specify the graph name. If name is
// empty and g implements Graph, the returned string from DOTID will be used.
//
// Graph serialization will work for a graph.Graph without modification,
// however, advanced GraphViz DOT features provided by Marshal depend on
// implementation of the Node, Attributer, Porter, Attributers, Structurer,
// Subgrapher and Graph interfaces.
//
// Attributes and IDs are quoted if needed during marshalling, to conform with
// valid DOT syntax. Quoted IDs and attributes are unquoted during unmarshaling,
// so the data is kept in raw form.
func Marshal(g graph.Graph, name, prefix, indent string) ([]byte, error) {
var p simpleGraphPrinter
p.indent = indent
@@ -105,14 +110,17 @@ func Marshal(g graph.Graph, name, prefix, indent string) ([]byte, error) {
// MarshalMulti returns the DOT encoding for the multigraph g, applying the
// prefix and indent to the encoding. Name is used to specify the graph name. If
// name is empty and g implements Graph, the returned string from DOTID
// will be used. If strict is true the output bytes will be prefixed with
// the DOT "strict" keyword.
// name is empty and g implements Graph, the returned string from DOTID will be
// used.
//
// Graph serialization will work for a graph.Multigraph without modification,
// however, advanced GraphViz DOT features provided by Marshal depend on
// implementation of the Node, Attributer, Porter, Attributers, Structurer,
// MultiSubgrapher and Multigraph interfaces.
//
// Attributes and IDs are quoted if needed during marshalling, to conform with
// valid DOT syntax. Quoted IDs and attributes are unquoted during unmarshaling,
// so the data is kept in raw form.
func MarshalMulti(g graph.Multigraph, name, prefix, indent string) ([]byte, error) {
var p multiGraphPrinter
p.indent = indent
@@ -308,7 +316,7 @@ func (p *printer) printFrontMatter(name string, needsIndent, isSubgraph, isDirec
if name != "" {
p.buf.WriteByte(' ')
p.buf.WriteString(name)
p.buf.WriteString(quoteID(name))
}
p.openBlock(" {")
@@ -316,13 +324,13 @@ func (p *printer) printFrontMatter(name string, needsIndent, isSubgraph, isDirec
}
func (p *printer) writeNode(n graph.Node) {
p.buf.WriteString(nodeID(n))
p.buf.WriteString(quoteID(nodeID(n)))
}
func (p *printer) writePorts(port, cp string) {
if port != "" {
p.buf.WriteByte(':')
p.buf.WriteString(port)
p.buf.WriteString(quoteID(port))
}
if cp != "" {
p.buf.WriteByte(':')
@@ -354,17 +362,17 @@ func (p *printer) writeAttributeList(a encoding.Attributer) {
case 0:
case 1:
p.buf.WriteString(" [")
p.buf.WriteString(attributes[0].Key)
p.buf.WriteString(quoteID(attributes[0].Key))
p.buf.WriteByte('=')
p.buf.WriteString(attributes[0].Value)
p.buf.WriteString(quoteID(attributes[0].Value))
p.buf.WriteString("]")
default:
p.openBlock(" [")
for _, att := range attributes {
p.newline()
p.buf.WriteString(att.Key)
p.buf.WriteString(quoteID(att.Key))
p.buf.WriteByte('=')
p.buf.WriteString(att.Value)
p.buf.WriteString(quoteID(att.Value))
}
p.closeBlock("]")
}
@@ -388,9 +396,9 @@ func (p *printer) writeAttributeComplex(ca Attributers) {
p.openBlock(" [")
for _, att := range attributes {
p.newline()
p.buf.WriteString(att.Key)
p.buf.WriteString(quoteID(att.Key))
p.buf.WriteByte('=')
p.buf.WriteString(att.Value)
p.buf.WriteString(quoteID(att.Value))
}
p.closeBlock("]")
haveWrittenBlock = true
@@ -580,3 +588,74 @@ func (p *multiGraphPrinter) print(g graph.Multigraph, name string, needsIndent,
return nil
}
// quoteID quotes the given string if needed in the context of an ID. If s is
// already quoted, or if s does not contain any spaces or special characters
// that need escaping, the original string is returned.
func quoteID(s string) string {
// To use a keyword as an ID, it must be quoted.
if isKeyword(s) {
return strconv.Quote(s)
}
// Quote if s is not an ID. This includes strings containing spaces, except
// if those spaces are used within HTML string IDs (e.g. <foo >).
if !isID(s) {
return strconv.Quote(s)
}
return s
}
// isKeyword reports whether the given string is a keyword in the DOT language.
func isKeyword(s string) bool {
// ref: https://www.graphviz.org/doc/info/lang.html
keywords := []string{"node", "edge", "graph", "digraph", "subgraph", "strict"}
for _, keyword := range keywords {
if strings.EqualFold(s, keyword) {
return true
}
}
return false
}
// FIXME: see if we rewrite this in another way to remove our regexp dependency.
// Regular expression to match identifier and numeral IDs.
var (
reIdent = regexp.MustCompile(`^[a-zA-Z\200-\377_][0-9a-zA-Z\200-\377_]*$`)
reNumeral = regexp.MustCompile(`^[-]?(\.[0-9]+|[0-9]+(\.[0-9]*)?)$`)
)
// isID reports whether the given string is an ID.
//
// An ID is one of the following:
//
// 1. Any string of alphabetic ([a-zA-Z\200-\377]) characters, underscores ('_')
// or digits ([0-9]), not beginning with a digit;
// 2. a numeral [-]?(.[0-9]+ | [0-9]+(.[0-9]*)? );
// 3. any double-quoted string ("...") possibly containing escaped quotes (\");
// 4. an HTML string (<...>).
func isID(s string) bool {
// 1. an identifier.
if reIdent.MatchString(s) {
return true
}
// 2. a numeral.
if reNumeral.MatchString(s) {
return true
}
// 3. double-quote string ID.
if len(s) >= 2 && strings.HasPrefix(s, `"`) && strings.HasSuffix(s, `"`) {
// Check that escape sequences within the double-quotes are valid.
if _, err := strconv.Unquote(s); err == nil {
return true
}
}
// 4. HTML ID.
return isHTMLID(s)
}
// isHTMLID reports whether the given string an HTML ID.
func isHTMLID(s string) bool {
// HTML IDs have the format /^<.*>$/
return len(s) >= 2 && strings.HasPrefix(s, "<") && strings.HasSuffix(s, ">")
}

View File

@@ -950,6 +950,64 @@ var encodeTests = []struct {
3 -- 4 [color=red];
}`,
},
{
g: undirectedEdgeAttrGraphFrom(powerMethodGraph, map[edge][]encoding.Attribute{
// label attribute not quoted and containing spaces.
{from: 0, to: 2}: {{Key: "label", Value: `hello world`}, {Key: "style", Value: "dashed"}},
{from: 2, to: 4}: {},
{from: 3, to: 4}: {{Key: "label", Value: `foo bar`}},
}),
want: `strict graph {
// Node definitions.
0;
1;
2;
3;
4;
// Edge definitions.
0 -- 1;
0 -- 2 [
label="hello world"
style=dashed
];
0 -- 4;
1 -- 3;
2 -- 3;
2 -- 4;
3 -- 4 [label="foo bar"];
}`,
},
{
g: undirectedEdgeAttrGraphFrom(powerMethodGraph, map[edge][]encoding.Attribute{
// keywords must be quoted if used as attributes.
{from: 0, to: 2}: {{Key: "label", Value: `NODE`}, {Key: "style", Value: "dashed"}},
{from: 2, to: 4}: {},
{from: 3, to: 4}: {{Key: "label", Value: `subgraph`}},
}),
want: `strict graph {
// Node definitions.
0;
1;
2;
3;
4;
// Edge definitions.
0 -- 1;
0 -- 2 [
label="NODE"
style=dashed
];
0 -- 4;
1 -- 3;
2 -- 3;
2 -- 4;
3 -- 4 [label="subgraph"];
}`,
},
// Handling nodes with ports.
{

View File

@@ -34,33 +34,33 @@ var decodeTests = []struct {
},
wantDOT: `strict digraph {
// Node definitions.
0x8a10d5a2611fd03f [name="Richard Marquand"];
0xa3cff1a4c3ef3bb6 [
"0x8a10d5a2611fd03f" [name="Richard Marquand"];
"0xa3cff1a4c3ef3bb6" [
name="Star Wars: Episode V - The Empire Strikes Back"
release_date=1980-05-21T00:00:00Z
release_date="1980-05-21T00:00:00Z"
revenue=534000000
running_time=124
];
0xb39aa14d66aedad5 [
"0xb39aa14d66aedad5" [
name="Star Wars: Episode VI - Return of the Jedi"
release_date=1983-05-25T00:00:00Z
release_date="1983-05-25T00:00:00Z"
revenue=572000000
running_time=131
];
0x0312de17a7ee89f9 [name="Luke Skywalker"];
0x3da8d1dcab1bb381 [name="Han Solo"];
0x4a7d0b5fe91e78a4 [name="Irvin Kernshner"];
0x718337b9dcbaa7d9 [name="Princess Leia"];
"0x0312de17a7ee89f9" [name="Luke Skywalker"];
"0x3da8d1dcab1bb381" [name="Han Solo"];
"0x4a7d0b5fe91e78a4" [name="Irvin Kernshner"];
"0x718337b9dcbaa7d9" [name="Princess Leia"];
// Edge definitions.
0xa3cff1a4c3ef3bb6 -> 0x0312de17a7ee89f9 [label=starring];
0xa3cff1a4c3ef3bb6 -> 0x3da8d1dcab1bb381 [label=starring];
0xa3cff1a4c3ef3bb6 -> 0x4a7d0b5fe91e78a4 [label=director];
0xa3cff1a4c3ef3bb6 -> 0x718337b9dcbaa7d9 [label=starring];
0xb39aa14d66aedad5 -> 0x8a10d5a2611fd03f [label=director];
0xb39aa14d66aedad5 -> 0x0312de17a7ee89f9 [label=starring];
0xb39aa14d66aedad5 -> 0x3da8d1dcab1bb381 [label=starring];
0xb39aa14d66aedad5 -> 0x718337b9dcbaa7d9 [label=starring];
"0xa3cff1a4c3ef3bb6" -> "0x0312de17a7ee89f9" [label=starring];
"0xa3cff1a4c3ef3bb6" -> "0x3da8d1dcab1bb381" [label=starring];
"0xa3cff1a4c3ef3bb6" -> "0x4a7d0b5fe91e78a4" [label=director];
"0xa3cff1a4c3ef3bb6" -> "0x718337b9dcbaa7d9" [label=starring];
"0xb39aa14d66aedad5" -> "0x8a10d5a2611fd03f" [label=director];
"0xb39aa14d66aedad5" -> "0x0312de17a7ee89f9" [label=starring];
"0xb39aa14d66aedad5" -> "0x3da8d1dcab1bb381" [label=starring];
"0xb39aa14d66aedad5" -> "0x718337b9dcbaa7d9" [label=starring];
}`,
},
{
@@ -72,52 +72,52 @@ var decodeTests = []struct {
},
wantDOT: `strict digraph {
// Node definitions.
0x892a6da7ee1fbdec [
"0x892a6da7ee1fbdec" [
age=55
name=Sarah
];
0x99b74c1b5ab100ec [
"0x99b74c1b5ab100ec" [
age=35
name=Artyom
];
0xb9e12a67e34d6acc [
"0xb9e12a67e34d6acc" [
age=19
name=Catalina
];
0xbf104824c777525d [name=Perro];
0xf590a923ea1fccaa [name=Goldie];
0xf92d7dbe272d680b [name="Hyung Sin"];
0x0fd90205a458151f [
"0xbf104824c777525d" [name=Perro];
"0xf590a923ea1fccaa" [name=Goldie];
"0xf92d7dbe272d680b" [name="Hyung Sin"];
"0x0fd90205a458151f" [
age=39
name=Michael
];
0x37734fcf0a6fcc69 [name="Rammy the sheep"];
0x52a80955d40ec819 [
"0x37734fcf0a6fcc69" [name="Rammy the sheep"];
"0x52a80955d40ec819" [
age=35
name=Amit
];
0x5e9ad1cd9466228c [
"0x5e9ad1cd9466228c" [
age=24
name="Sang Hyun"
];
// Edge definitions.
0xb9e12a67e34d6acc -> 0xbf104824c777525d [label=owns_pet];
0xb9e12a67e34d6acc -> 0x5e9ad1cd9466228c [label=friend];
0xf92d7dbe272d680b -> 0x5e9ad1cd9466228c [label=friend];
0x0fd90205a458151f -> 0x892a6da7ee1fbdec [label=friend];
0x0fd90205a458151f -> 0x99b74c1b5ab100ec [label=friend];
0x0fd90205a458151f -> 0xb9e12a67e34d6acc [label=friend];
0x0fd90205a458151f -> 0x37734fcf0a6fcc69 [label=owns_pet];
0x0fd90205a458151f -> 0x52a80955d40ec819 [label=friend];
0x0fd90205a458151f -> 0x5e9ad1cd9466228c [label=friend];
0x52a80955d40ec819 -> 0x99b74c1b5ab100ec [label=friend];
0x52a80955d40ec819 -> 0x0fd90205a458151f [label=friend];
0x52a80955d40ec819 -> 0x5e9ad1cd9466228c [label=friend];
0x5e9ad1cd9466228c -> 0xb9e12a67e34d6acc [label=friend];
0x5e9ad1cd9466228c -> 0xf590a923ea1fccaa [label=owns_pet];
0x5e9ad1cd9466228c -> 0xf92d7dbe272d680b [label=friend];
0x5e9ad1cd9466228c -> 0x52a80955d40ec819 [label=friend];
"0xb9e12a67e34d6acc" -> "0xbf104824c777525d" [label=owns_pet];
"0xb9e12a67e34d6acc" -> "0x5e9ad1cd9466228c" [label=friend];
"0xf92d7dbe272d680b" -> "0x5e9ad1cd9466228c" [label=friend];
"0x0fd90205a458151f" -> "0x892a6da7ee1fbdec" [label=friend];
"0x0fd90205a458151f" -> "0x99b74c1b5ab100ec" [label=friend];
"0x0fd90205a458151f" -> "0xb9e12a67e34d6acc" [label=friend];
"0x0fd90205a458151f" -> "0x37734fcf0a6fcc69" [label=owns_pet];
"0x0fd90205a458151f" -> "0x52a80955d40ec819" [label=friend];
"0x0fd90205a458151f" -> "0x5e9ad1cd9466228c" [label=friend];
"0x52a80955d40ec819" -> "0x99b74c1b5ab100ec" [label=friend];
"0x52a80955d40ec819" -> "0x0fd90205a458151f" [label=friend];
"0x52a80955d40ec819" -> "0x5e9ad1cd9466228c" [label=friend];
"0x5e9ad1cd9466228c" -> "0xb9e12a67e34d6acc" [label=friend];
"0x5e9ad1cd9466228c" -> "0xf590a923ea1fccaa" [label=owns_pet];
"0x5e9ad1cd9466228c" -> "0xf92d7dbe272d680b" [label=friend];
"0x5e9ad1cd9466228c" -> "0x52a80955d40ec819" [label=friend];
}`,
},
{