Skip to content

Commit

Permalink
new doc page
Browse files Browse the repository at this point in the history
  • Loading branch information
johnkerl committed Sep 10, 2023
1 parent 0f27a39 commit 736a326
Show file tree
Hide file tree
Showing 5 changed files with 383 additions and 0 deletions.
1 change: 1 addition & 0 deletions docs/mkdocs.yml
Original file line number Diff line number Diff line change
Expand Up @@ -109,6 +109,7 @@ nav:
- "Auxiliary commands": "reference-main-auxiliary-commands.md"
- "Manual page": "manpage.md"
- "Building from source": "build.md"
- "Miller as a library": "miller-as-library.md"
- "How to create a new release": "how-to-release.md"
- "Documents for previous releases": "release-docs.md"
- "Glossary": "glossary.md"
Expand Down
202 changes: 202 additions & 0 deletions docs/src/miller-as-library.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,202 @@
<!--- PLEASE DO NOT EDIT DIRECTLY. EDIT THE .md.in FILE PLEASE. --->
<div>
<span class="quicklinks">
Quick links:
&nbsp;
<a class="quicklink" href="../reference-main-flag-list/index.html">Flags</a>
&nbsp;
<a class="quicklink" href="../reference-verbs/index.html">Verbs</a>
&nbsp;
<a class="quicklink" href="../reference-dsl-builtin-functions/index.html">Functions</a>
&nbsp;
<a class="quicklink" href="../glossary/index.html">Glossary</a>
&nbsp;
<a class="quicklink" href="../release-docs/index.html">Release docs</a>
</span>
</div>
# Miller as a library

Very initially and experimentally, as of Miller 6.9.1, Go developers will be able to access Miller source
code --- moved from `internal/pkg/` to `pkg/` --- within their own Go projects.

## Setup

```
$ mkdir use-mlr
$ cd cd use-mlr
$ go mod init github.com/johnkerl/miller-library-example
go: creating new go.mod: module github.com/johnkerl/miller-library-example
# One of:
$ go get github.com/johnkerl/miller
$ go get github.com/johnkerl/miller@0f27a39a9f92d4c633dd29d99ad203e95a484dd3
# Etc.
$ go mod tidy
```

## One example use

<pre class="pre-non-highlight-non-pair">
package main

import (
"fmt"

"github.com/johnkerl/miller/pkg/bifs"
"github.com/johnkerl/miller/pkg/mlrval"
)

func main() {
a := mlrval.FromInt(2)
b := mlrval.FromInt(60)
c := bifs.BIF_pow(a, b)
fmt.Println(c.String())
}
</pre>

```
$ go build main1.go
$ ./main1
1152921504606846976
```

Or simply:
```
$ go run main1.go
1152921504606846976
```

## Another example use

<pre class="pre-non-highlight-non-pair">
package main

import (
"bufio"
"container/list"
"errors"
"fmt"
"os"

"github.com/johnkerl/miller/pkg/cli"
"github.com/johnkerl/miller/pkg/input"
"github.com/johnkerl/miller/pkg/output"
"github.com/johnkerl/miller/pkg/transformers"
"github.com/johnkerl/miller/pkg/types"
)

func convert_csv_to_json(fileNames []string) error {
options := &cli.TOptions{
ReaderOptions: cli.TReaderOptions{
InputFileFormat: "csv",
IFS: ",",
IRS: "\n",
RecordsPerBatch: 1,
},
WriterOptions: cli.TWriterOptions{
OutputFileFormat: "json",
},
}
outputStream := os.Stdout
outputIsStdout := true

// Since Go is concurrent, the context struct needs to be duplicated and
// passed through the channels along with each record.
initialContext := types.NewContext()

// Instantiate the record-reader.
// RecordsPerBatch is tracked separately from ReaderOptions since join/repl
// may use batch size of 1.
recordReader, err := input.Create(&options.ReaderOptions, options.ReaderOptions.RecordsPerBatch)
if err != nil {
return err
}

// Instantiate the record-writer
recordWriter, err := output.Create(&options.WriterOptions)
if err != nil {
return err
}

cat, err := transformers.NewTransformerCat(
false, // doCounters bool,
"", // counterFieldName string,
nil, // groupByFieldNames []string,
false, // doFileName bool,
false, // doFileNum bool,
)
if err != nil {
return err
}
recordTransformers := []transformers.IRecordTransformer{cat}

// Set up the reader-to-transformer and transformer-to-writer channels.
readerChannel := make(chan *list.List, 2) // list of *types.RecordAndContext
writerChannel := make(chan *list.List, 1) // list of *types.RecordAndContext

// We're done when a fatal error is registered on input (file not found,
// etc) or when the record-writer has written all its output. We use
// channels to communicate both of these conditions.
inputErrorChannel := make(chan error, 1)
doneWritingChannel := make(chan bool, 1)
dataProcessingErrorChannel := make(chan bool, 1)

readerDownstreamDoneChannel := make(chan bool, 1)

// Start the reader, transformer, and writer. Let them run until fatal input
// error or end-of-processing happens.
bufferedOutputStream := bufio.NewWriter(outputStream)

go recordReader.Read(fileNames, *initialContext, readerChannel, inputErrorChannel, readerDownstreamDoneChannel)
go transformers.ChainTransformer(readerChannel, readerDownstreamDoneChannel, recordTransformers,
writerChannel, options)
go output.ChannelWriter(writerChannel, recordWriter, &options.WriterOptions, doneWritingChannel,
dataProcessingErrorChannel, bufferedOutputStream, outputIsStdout)

var retval error
done := false
for !done {
select {
case ierr := &lt;-inputErrorChannel:
retval = ierr
break
case _ = &lt;-dataProcessingErrorChannel:
retval = errors.New("exiting due to data error") // details already printed
break
case _ = &lt;-doneWritingChannel:
done = true
break
}
}

bufferedOutputStream.Flush()

return retval
}

func main() {
err := convert_csv_to_json(os.Args[1:])
if err != nil {
fmt.Fprintf(os.Stderr, "%v\n", err)
}
}
</pre>

<pre class="pre-non-highlight-non-pair">
host,status
apoapsis.east.our.org,up
nadir.west.our.org,down
</pre>

```
$ go build main2.go
$ ./main2 data/hostnames.csv
{"host": "apoapsis.east.our.org", "status": "up"}
{"host": "nadir.west.our.org", "status": "down"}
```



54 changes: 54 additions & 0 deletions docs/src/miller-as-library.md.in
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
# Miller as a library

Very initially and experimentally, as of Miller 6.9.1, Go developers will be able to access Miller source
code --- moved from `internal/pkg/` to `pkg/` --- within their own Go projects.

## Setup

```
$ mkdir use-mlr

$ cd cd use-mlr

$ go mod init github.com/johnkerl/miller-library-example
go: creating new go.mod: module github.com/johnkerl/miller-library-example

# One of:
$ go get github.com/johnkerl/miller
$ go get github.com/johnkerl/miller@0f27a39a9f92d4c633dd29d99ad203e95a484dd3
# Etc.

$ go mod tidy
```

## One example use

GENMD-INCLUDE-ESCAPED(miller-as-library/main1.go)

```
$ go build main1.go
$ ./main1
1152921504606846976
```

Or simply:
```
$ go run main1.go
1152921504606846976
```

## Another example use

GENMD-INCLUDE-ESCAPED(miller-as-library/main2.go)

GENMD-INCLUDE-ESCAPED(data/hostnames.csv)

```
$ go build main2.go
$ ./main2 data/hostnames.csv
{"host": "apoapsis.east.our.org", "status": "up"}
{"host": "nadir.west.our.org", "status": "down"}
```



15 changes: 15 additions & 0 deletions docs/src/miller-as-library/main1.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
package main

import (
"fmt"

"github.com/johnkerl/miller/pkg/bifs"
"github.com/johnkerl/miller/pkg/mlrval"
)

func main() {
a := mlrval.FromInt(2)
b := mlrval.FromInt(60)
c := bifs.BIF_pow(a, b)
fmt.Println(c.String())
}
111 changes: 111 additions & 0 deletions docs/src/miller-as-library/main2.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,111 @@
package main

import (
"bufio"
"container/list"
"errors"
"fmt"
"os"

"github.com/johnkerl/miller/pkg/cli"
"github.com/johnkerl/miller/pkg/input"
"github.com/johnkerl/miller/pkg/output"
"github.com/johnkerl/miller/pkg/transformers"
"github.com/johnkerl/miller/pkg/types"
)

func convert_csv_to_json(fileNames []string) error {
options := &cli.TOptions{
ReaderOptions: cli.TReaderOptions{
InputFileFormat: "csv",
IFS: ",",
IRS: "\n",
RecordsPerBatch: 1,
},
WriterOptions: cli.TWriterOptions{
OutputFileFormat: "json",
},
}
outputStream := os.Stdout
outputIsStdout := true

// Since Go is concurrent, the context struct needs to be duplicated and
// passed through the channels along with each record.
initialContext := types.NewContext()

// Instantiate the record-reader.
// RecordsPerBatch is tracked separately from ReaderOptions since join/repl
// may use batch size of 1.
recordReader, err := input.Create(&options.ReaderOptions, options.ReaderOptions.RecordsPerBatch)
if err != nil {
return err
}

// Instantiate the record-writer
recordWriter, err := output.Create(&options.WriterOptions)
if err != nil {
return err
}

cat, err := transformers.NewTransformerCat(
false, // doCounters bool,
"", // counterFieldName string,
nil, // groupByFieldNames []string,
false, // doFileName bool,
false, // doFileNum bool,
)
if err != nil {
return err
}
recordTransformers := []transformers.IRecordTransformer{cat}

// Set up the reader-to-transformer and transformer-to-writer channels.
readerChannel := make(chan *list.List, 2) // list of *types.RecordAndContext
writerChannel := make(chan *list.List, 1) // list of *types.RecordAndContext

// We're done when a fatal error is registered on input (file not found,
// etc) or when the record-writer has written all its output. We use
// channels to communicate both of these conditions.
inputErrorChannel := make(chan error, 1)
doneWritingChannel := make(chan bool, 1)
dataProcessingErrorChannel := make(chan bool, 1)

readerDownstreamDoneChannel := make(chan bool, 1)

// Start the reader, transformer, and writer. Let them run until fatal input
// error or end-of-processing happens.
bufferedOutputStream := bufio.NewWriter(outputStream)

go recordReader.Read(fileNames, *initialContext, readerChannel, inputErrorChannel, readerDownstreamDoneChannel)
go transformers.ChainTransformer(readerChannel, readerDownstreamDoneChannel, recordTransformers,
writerChannel, options)
go output.ChannelWriter(writerChannel, recordWriter, &options.WriterOptions, doneWritingChannel,
dataProcessingErrorChannel, bufferedOutputStream, outputIsStdout)

var retval error
done := false
for !done {
select {
case ierr := <-inputErrorChannel:
retval = ierr
break
case _ = <-dataProcessingErrorChannel:
retval = errors.New("exiting due to data error") // details already printed
break
case _ = <-doneWritingChannel:
done = true
break
}
}

bufferedOutputStream.Flush()

return retval
}

func main() {
err := convert_csv_to_json(os.Args[1:])
if err != nil {
fmt.Fprintf(os.Stderr, "%v\n", err)
}
}

0 comments on commit 736a326

Please sign in to comment.