From ccefbb0f6c2ec8a4cd8c4dfdc9b7a9176c6f6a59 Mon Sep 17 00:00:00 2001 From: karthik kalarikal Date: Wed, 19 Mar 2025 21:46:01 +0530 Subject: [PATCH 1/6] initial commit --- go.mod | 3 +++ main.go | 5 +++++ 2 files changed, 8 insertions(+) create mode 100644 go.mod create mode 100644 main.go diff --git a/go.mod b/go.mod new file mode 100644 index 0000000..648bb79 --- /dev/null +++ b/go.mod @@ -0,0 +1,3 @@ +module github.com/karthikkalarikal/Qube + +go 1.23.5 diff --git a/main.go b/main.go new file mode 100644 index 0000000..7905807 --- /dev/null +++ b/main.go @@ -0,0 +1,5 @@ +package main + +func main() { + +} From e571f2498ec18d1907f86b2e938228872e4cb281 Mon Sep 17 00:00:00 2001 From: karthik kalarikal Date: Thu, 20 Mar 2025 10:34:26 +0530 Subject: [PATCH 2/6] basic cli set up, models for actor, movie --- cmd/root.go | 53 ++++++++++++++++++++++++++++++++++++++++++++++++ go.mod | 6 ++++++ go.sum | 10 +++++++++ main.go | 4 +++- models/model.go | 20 ++++++++++++++++++ pkg/util/util.go | 18 ++++++++++++++++ 6 files changed, 110 insertions(+), 1 deletion(-) create mode 100644 cmd/root.go create mode 100644 go.sum create mode 100644 models/model.go create mode 100644 pkg/util/util.go diff --git a/cmd/root.go b/cmd/root.go new file mode 100644 index 0000000..8040a6a --- /dev/null +++ b/cmd/root.go @@ -0,0 +1,53 @@ +package cmd + +import ( + "fmt" + "log" + "os" + + "github.com/karthikkalarikal/Qube/models" + "github.com/karthikkalarikal/Qube/pkg/util" + "github.com/spf13/cobra" +) + +var ( + rootCmd = &cobra.Command{ + Use: "separation", + Short: "Connect your favorite celebrities.", + Long: `A cli application that let's you figure out the connection between actors by separation`, + Run: generate, + } + actor1 string + actor2 string + separation uint +) + +func Execute() { + if err := rootCmd.Execute(); err != nil { + fmt.Println(err) + os.Exit(1) + } +} + +func init() { + + rootCmd.PersistentFlags().StringVarP(&actor1, "actor1", "a", "", "name an actor/celeb") + rootCmd.PersistentFlags().StringVarP(&actor2, "actor2", "b", "", "name an actor/celeb") + rootCmd.PersistentFlags().UintVarP(&separation, "separation", "s", 3, "separation between the actors/celebs") + +} + +func generate(_ *cobra.Command, args []string) { + config := models.Config{ + Actor1: actor1, + Actor2: actor2, + Separation: separation, + } + var tar models.Actor + err := util.GetByURL(config.Actor1, &tar) + if err != nil { + log.Println(err) + os.Exit(1) + } + log.Println(tar) +} diff --git a/go.mod b/go.mod index 648bb79..a0077a5 100644 --- a/go.mod +++ b/go.mod @@ -1,3 +1,9 @@ module github.com/karthikkalarikal/Qube go 1.23.5 + +require ( + github.com/inconshreveable/mousetrap v1.1.0 // indirect + github.com/spf13/cobra v1.9.1 // indirect + github.com/spf13/pflag v1.0.6 // indirect +) diff --git a/go.sum b/go.sum new file mode 100644 index 0000000..ffae55e --- /dev/null +++ b/go.sum @@ -0,0 +1,10 @@ +github.com/cpuguy83/go-md2man/v2 v2.0.6/go.mod h1:oOW0eioCTA6cOiMLiUPZOpcVxMig6NIQQ7OS05n1F4g= +github.com/inconshreveable/mousetrap v1.1.0 h1:wN+x4NVGpMsO7ErUn/mUI3vEoE6Jt13X2s0bqwp9tc8= +github.com/inconshreveable/mousetrap v1.1.0/go.mod h1:vpF70FUmC8bwa3OWnCshd2FqLfsEA9PFc4w1p2J65bw= +github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM= +github.com/spf13/cobra v1.9.1 h1:CXSaggrXdbHK9CF+8ywj8Amf7PBRmPCOJugH954Nnlo= +github.com/spf13/cobra v1.9.1/go.mod h1:nDyEzZ8ogv936Cinf6g1RU9MRY64Ir93oCnqb9wxYW0= +github.com/spf13/pflag v1.0.6 h1:jFzHGLGAlb3ruxLB8MhbI6A8+AQX/2eW4qeyNZXNp2o= +github.com/spf13/pflag v1.0.6/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg= +gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= +gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= diff --git a/main.go b/main.go index 7905807..5712602 100644 --- a/main.go +++ b/main.go @@ -1,5 +1,7 @@ package main -func main() { +import "github.com/karthikkalarikal/Qube/cmd" +func main() { + cmd.Execute() } diff --git a/models/model.go b/models/model.go new file mode 100644 index 0000000..d20730e --- /dev/null +++ b/models/model.go @@ -0,0 +1,20 @@ +package models + +type Config struct { + Actor1 string + Actor2 string + Separation uint +} + +type Movie struct { + Name string `json:"name"` + URL string `json:"url"` + Role string `json:"role"` +} + +type Actor struct { + URL string `json:"url"` + Type string `json:"type"` + Name string `json:"name"` + Movies []Movie `json:"movies"` +} diff --git a/pkg/util/util.go b/pkg/util/util.go new file mode 100644 index 0000000..ab4dcf8 --- /dev/null +++ b/pkg/util/util.go @@ -0,0 +1,18 @@ +package util + +import ( + "encoding/json" + "log" + "net/http" +) + +// fetch resources +func GetByURL(url string, target any) error { + resp, err := http.Get("http://data.moviebuff.com/" + url) + if err != nil { + return err + } + defer resp.Body.Close() + log.Println(resp.Body) + return json.NewDecoder(resp.Body).Decode(target) +} From 66ce4b307db9819817882626aecc12db9c92045b Mon Sep 17 00:00:00 2001 From: karthik kalarikal Date: Thu, 20 Mar 2025 15:22:43 +0530 Subject: [PATCH 3/6] bfs search. --- cmd/root.go | 23 +++++---- go.mod | 6 ++- go.sum | 2 + models/model.go | 30 +++++++++-- pkg/traversal/bfs.go | 119 +++++++++++++++++++++++++++++++++++++++++++ pkg/util/util.go | 19 ++++++- pkg/util/validate.go | 25 +++++++++ 7 files changed, 207 insertions(+), 17 deletions(-) create mode 100644 pkg/traversal/bfs.go create mode 100644 pkg/util/validate.go diff --git a/cmd/root.go b/cmd/root.go index 8040a6a..c6583fe 100644 --- a/cmd/root.go +++ b/cmd/root.go @@ -6,6 +6,7 @@ import ( "os" "github.com/karthikkalarikal/Qube/models" + "github.com/karthikkalarikal/Qube/pkg/traversal" "github.com/karthikkalarikal/Qube/pkg/util" "github.com/spf13/cobra" ) @@ -33,21 +34,25 @@ func init() { rootCmd.PersistentFlags().StringVarP(&actor1, "actor1", "a", "", "name an actor/celeb") rootCmd.PersistentFlags().StringVarP(&actor2, "actor2", "b", "", "name an actor/celeb") - rootCmd.PersistentFlags().UintVarP(&separation, "separation", "s", 3, "separation between the actors/celebs") + // rootCmd.PersistentFlags().UintVarP(&separation, "separation", "s", 3, "separation between the actors/celebs") } func generate(_ *cobra.Command, args []string) { config := models.Config{ - Actor1: actor1, - Actor2: actor2, - Separation: separation, + Actor1: actor1, + Actor2: actor2, + // Separation: separation, } - var tar models.Actor - err := util.GetByURL(config.Actor1, &tar) - if err != nil { - log.Println(err) + + if err := util.Exists(config.Actor1); err != nil { + log.Printf("the name of the actor1: %s is incorrect %v", actor1, err) + os.Exit(1) + } + if err := util.Exists(config.Actor2); err != nil { + log.Printf("the name of the actor2: %s is incorrect %v", actor2, err) os.Exit(1) } - log.Println(tar) + + traversal.NewNode(config.Actor1, config.Actor2) } diff --git a/go.mod b/go.mod index a0077a5..90c16ba 100644 --- a/go.mod +++ b/go.mod @@ -2,8 +2,12 @@ module github.com/karthikkalarikal/Qube go 1.23.5 +require ( + github.com/spf13/cobra v1.9.1 + golang.org/x/time v0.11.0 +) + require ( github.com/inconshreveable/mousetrap v1.1.0 // indirect - github.com/spf13/cobra v1.9.1 // indirect github.com/spf13/pflag v1.0.6 // indirect ) diff --git a/go.sum b/go.sum index ffae55e..748bdbd 100644 --- a/go.sum +++ b/go.sum @@ -6,5 +6,7 @@ github.com/spf13/cobra v1.9.1 h1:CXSaggrXdbHK9CF+8ywj8Amf7PBRmPCOJugH954Nnlo= github.com/spf13/cobra v1.9.1/go.mod h1:nDyEzZ8ogv936Cinf6g1RU9MRY64Ir93oCnqb9wxYW0= github.com/spf13/pflag v1.0.6 h1:jFzHGLGAlb3ruxLB8MhbI6A8+AQX/2eW4qeyNZXNp2o= github.com/spf13/pflag v1.0.6/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg= +golang.org/x/time v0.11.0 h1:/bpjEDfN9tkoN/ryeYHnv5hcMlc8ncjMcM4XBk5NWV0= +golang.org/x/time v0.11.0/go.mod h1:CDIdPxbZBQxdj6cxyCIdrNogrJKMJ7pr37NYpMcMDSg= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= diff --git a/models/model.go b/models/model.go index d20730e..8228391 100644 --- a/models/model.go +++ b/models/model.go @@ -6,15 +6,35 @@ type Config struct { Separation uint } -type Movie struct { +type MovieConn struct { Name string `json:"name"` URL string `json:"url"` Role string `json:"role"` } type Actor struct { - URL string `json:"url"` - Type string `json:"type"` - Name string `json:"name"` - Movies []Movie `json:"movies"` + URL string `json:"url"` + Type string `json:"type"` + Name string `json:"name"` + Movies []MovieConn `json:"movies"` +} + +type Cast struct { + URL string `json:"url"` + Name string `json:"name"` + Role string `json:"role"` +} + +type Crew struct { + Name string `json:"name"` + URL string `json:"url"` + Role string `json:"role"` +} + +type Movie struct { + URL string `json:"url"` + Type string `json:"type"` + Name string `json:"name"` + Cast []Cast `json:"cast"` + Crew []Crew `json:"crew"` } diff --git a/pkg/traversal/bfs.go b/pkg/traversal/bfs.go new file mode 100644 index 0000000..bb256e1 --- /dev/null +++ b/pkg/traversal/bfs.go @@ -0,0 +1,119 @@ +package traversal + +import ( + "fmt" + "log" + + "github.com/karthikkalarikal/Qube/models" + "github.com/karthikkalarikal/Qube/pkg/util" +) + +type Node struct { + Entity string + Type string + From *Node + Link string +} + +func NewNode(start, target string) { + node := bfs(start, target) + printPath(node) +} + +func bfs(start, end string) *Node { + visited := make(map[string]bool) + queue := make([]Node, 1) + queue[0] = Node{Entity: start, Type: "person"} + + for len(queue) > 0 { + current := queue[0] + queue = queue[1:] + log.Println(current.Type) + if current.Entity == end { + return ¤t + } + visited[current.Entity] = true + + if current.Type == "person" { + var person models.Actor + + if err := util.GetByURL(current.Entity, &person); err != nil { + visited[current.Entity] = true + log.Printf("Skipping inaccessible node: %s (%v)", current.Entity, err) + continue + } + + for _, credit := range person.Movies { + node := Node{ + Entity: credit.URL, + Type: "movie", + From: ¤t, + Link: "Movie: " + credit.Name + " (" + credit.Role + ")", + } + + if !visited[credit.URL] { + queue = append(queue, node) + } + } + } else { + var movie models.Movie + + if err := util.GetByURL(current.Entity, &movie); err != nil { + visited[current.Entity] = true + log.Printf("Skipping inaccessible node: %s (%v)", current.Entity, err) + continue + } + + log.Println(movie.URL) + for _, cast := range movie.Cast { + + node := Node{ + Entity: cast.URL, + Type: "person", + From: ¤t, + Link: "Cast: " + cast.Name + " (" + cast.Role + ")", + } + if !visited[cast.URL] { + queue = append(queue, node) + } + + } + for _, crew := range movie.Crew { + + node := Node{ + Entity: crew.URL, + Type: "person", + From: ¤t, + Link: "Crew: " + crew.Name + " (" + crew.Role + ")", + } + + if !visited[crew.URL] { + queue = append(queue, node) + } + } + } + + } + return nil + +} + +func printPath(node *Node) { + if node == nil { + fmt.Println("No path found.") + return + } + + var path []*Node + for node != nil { + path = append([]*Node{node}, path...) + node = node.From + } + for i, n := range path { + if i == 0 { + fmt.Printf("%d. Start: %s\n", i+1, n.Entity) + } else { + fmt.Printf("%d. %s\n", i+1, n.Link) + } + } +} diff --git a/pkg/util/util.go b/pkg/util/util.go index ab4dcf8..9536d73 100644 --- a/pkg/util/util.go +++ b/pkg/util/util.go @@ -1,18 +1,33 @@ package util import ( + "context" "encoding/json" - "log" + "fmt" "net/http" + "time" + + "golang.org/x/time/rate" ) +var limiter = rate.NewLimiter(rate.Every(time.Second/5), 3) + // fetch resources func GetByURL(url string, target any) error { + + err := limiter.Wait(context.Background()) + if err != nil { + return err + } resp, err := http.Get("http://data.moviebuff.com/" + url) if err != nil { return err } defer resp.Body.Close() - log.Println(resp.Body) + if resp.StatusCode != http.StatusForbidden { + + return fmt.Errorf("access denied for URL: %s", url) + + } return json.NewDecoder(resp.Body).Decode(target) } diff --git a/pkg/util/validate.go b/pkg/util/validate.go new file mode 100644 index 0000000..8c5a1c5 --- /dev/null +++ b/pkg/util/validate.go @@ -0,0 +1,25 @@ +package util + +import ( + "context" + "fmt" + "net/http" +) + +func Exists(url string) error { + + err := limiter.Wait(context.Background()) + if err != nil { + return err + } + resp, err := http.Get("http://data.moviebuff.com/" + url) + if err != nil { + return err + } + defer resp.Body.Close() + if resp.StatusCode != http.StatusOK { + + return fmt.Errorf("unexpected status: %s", resp.Status) + } + return nil +} From 24b56f945258d9687c072866124bc54994d21f12 Mon Sep 17 00:00:00 2001 From: karthik kalarikal Date: Thu, 20 Mar 2025 16:27:21 +0530 Subject: [PATCH 4/6] adding concurrency for performance --- pkg/traversal/bfs.go | 150 +++++++++++++++++++++++++------------------ pkg/util/util.go | 2 +- 2 files changed, 87 insertions(+), 65 deletions(-) diff --git a/pkg/traversal/bfs.go b/pkg/traversal/bfs.go index bb256e1..184d716 100644 --- a/pkg/traversal/bfs.go +++ b/pkg/traversal/bfs.go @@ -3,6 +3,7 @@ package traversal import ( "fmt" "log" + "sync" "github.com/karthikkalarikal/Qube/models" "github.com/karthikkalarikal/Qube/pkg/util" @@ -21,77 +22,98 @@ func NewNode(start, target string) { } func bfs(start, end string) *Node { - visited := make(map[string]bool) + var visited sync.Map + mu := sync.Mutex{} + wg := sync.WaitGroup{} + gofer := make(chan struct{}, 10) queue := make([]Node, 1) queue[0] = Node{Entity: start, Type: "person"} for len(queue) > 0 { - current := queue[0] - queue = queue[1:] - log.Println(current.Type) - if current.Entity == end { - return ¤t - } - visited[current.Entity] = true - - if current.Type == "person" { - var person models.Actor - - if err := util.GetByURL(current.Entity, &person); err != nil { - visited[current.Entity] = true - log.Printf("Skipping inaccessible node: %s (%v)", current.Entity, err) - continue + l := len(queue) + for i := 0; i < l; i++ { + + current := queue[0] + queue = queue[1:] + log.Println(current.Entity, " ", current.Type) + if current.Entity == end { + return ¤t } - - for _, credit := range person.Movies { - node := Node{ - Entity: credit.URL, - Type: "movie", - From: ¤t, - Link: "Movie: " + credit.Name + " (" + credit.Role + ")", - } - - if !visited[credit.URL] { - queue = append(queue, node) + visited.Store(current.Entity, true) + wg.Add(1) + gofer <- struct{}{} + go func(current Node) { + defer func() { + wg.Done() + <-gofer + }() + if current.Type == "person" { + var person models.Actor + + if err := util.GetByURL(current.Entity, &person); err != nil { + visited.Store(current.Entity, true) + log.Printf("Skipping inaccessible node: %s (%v)", current.Entity, err) + + } + + for _, credit := range person.Movies { + node := Node{ + Entity: credit.URL, + Type: "movie", + From: ¤t, + Link: "Movie: " + credit.Name + " (" + credit.Role + ")", + } + + if _, ok := visited.Load(credit.URL); !ok { + mu.Lock() + queue = append(queue, node) + mu.Unlock() + } + } + } else { + var movie models.Movie + + if err := util.GetByURL(current.Entity, &movie); err != nil { + visited.Store(current.Entity, true) + log.Printf("Skipping inaccessible node: %s (%v)", current.Entity, err) + + } + + log.Println(movie.URL) + for _, cast := range movie.Cast { + + node := Node{ + Entity: cast.URL, + Type: "person", + From: ¤t, + Link: "Cast: " + cast.Name + " (" + cast.Role + ")", + } + if _, ok := visited.Load(cast.URL); !ok { + mu.Lock() + queue = append(queue, node) + mu.Unlock() + } + + } + for _, crew := range movie.Crew { + + node := Node{ + Entity: crew.URL, + Type: "person", + From: ¤t, + Link: "Crew: " + crew.Name + " (" + crew.Role + ")", + } + + if _, ok := visited.Load(crew.URL); !ok { + mu.Lock() + queue = append(queue, node) + mu.Unlock() + } + } } - } - } else { - var movie models.Movie - - if err := util.GetByURL(current.Entity, &movie); err != nil { - visited[current.Entity] = true - log.Printf("Skipping inaccessible node: %s (%v)", current.Entity, err) - continue - } - - log.Println(movie.URL) - for _, cast := range movie.Cast { - - node := Node{ - Entity: cast.URL, - Type: "person", - From: ¤t, - Link: "Cast: " + cast.Name + " (" + cast.Role + ")", - } - if !visited[cast.URL] { - queue = append(queue, node) - } - - } - for _, crew := range movie.Crew { - - node := Node{ - Entity: crew.URL, - Type: "person", - From: ¤t, - Link: "Crew: " + crew.Name + " (" + crew.Role + ")", - } - - if !visited[crew.URL] { - queue = append(queue, node) - } - } + }(current) } + wg.Wait() } return nil diff --git a/pkg/util/util.go b/pkg/util/util.go index 9536d73..72ef5a0 100644 --- a/pkg/util/util.go +++ b/pkg/util/util.go @@ -24,7 +24,7 @@ func GetByURL(url string, target any) error { return err } defer resp.Body.Close() - if resp.StatusCode != http.StatusForbidden { + if resp.StatusCode == http.StatusForbidden { return fmt.Errorf("access denied for URL: %s", url) From 11d5458896bb89ed33754443c02d03658607d3f3 Mon Sep 17 00:00:00 2001 From: karthik kalarikal Date: Fri, 21 Mar 2025 21:01:02 +0530 Subject: [PATCH 5/6] modified the code for better performance, --- pkg/traversal/bfs.go | 158 +++++++++++++++++++++++++++---------------- pkg/util/util.go | 2 +- 2 files changed, 99 insertions(+), 61 deletions(-) diff --git a/pkg/traversal/bfs.go b/pkg/traversal/bfs.go index 184d716..d106a3f 100644 --- a/pkg/traversal/bfs.go +++ b/pkg/traversal/bfs.go @@ -1,6 +1,7 @@ package traversal import ( + "context" "fmt" "log" "sync" @@ -23,101 +24,138 @@ func NewNode(start, target string) { func bfs(start, end string) *Node { var visited sync.Map - mu := sync.Mutex{} - wg := sync.WaitGroup{} - gofer := make(chan struct{}, 10) - queue := make([]Node, 1) - queue[0] = Node{Entity: start, Type: "person"} - - for len(queue) > 0 { - l := len(queue) - for i := 0; i < l; i++ { - - current := queue[0] - queue = queue[1:] - log.Println(current.Entity, " ", current.Type) - if current.Entity == end { - return ¤t - } - visited.Store(current.Entity, true) - wg.Add(1) - gofer <- struct{}{} - go func(current Node) { - defer func() { - wg.Done() - <-gofer - }() - if current.Type == "person" { - var person models.Actor + var once sync.Once + var wg sync.WaitGroup - if err := util.GetByURL(current.Entity, &person); err != nil { - visited.Store(current.Entity, true) - log.Printf("Skipping inaccessible node: %s (%v)", current.Entity, err) + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + type safeQueue struct { + mu sync.Mutex + cond *sync.Cond + items []Node + } + + queue := &safeQueue{items: make([]Node, 0)} + queue.cond = sync.NewCond(&queue.mu) + result := make(chan *Node, 1) + + // Initialize queue with start node + queue.mu.Lock() + queue.items = append(queue.items, Node{Entity: start, Type: "person"}) + queue.cond.Signal() + queue.mu.Unlock() + + worker := func() { + defer wg.Done() + for { + select { + case <-ctx.Done(): + return + default: + queue.mu.Lock() + // Wait for work or cancellation + for len(queue.items) == 0 { + if ctx.Err() != nil { + queue.mu.Unlock() + return } + queue.cond.Wait() + } + // Dequeue node + current := queue.items[0] + queue.items = queue.items[1:] + queue.mu.Unlock() + + // Skip if already processed + if _, loaded := visited.LoadOrStore(current.Entity, true); loaded { + continue + } + // Early termination check + if current.Entity == end { + once.Do(func() { + result <- ¤t + cancel() + }) + return + } + + // Process node + if current.Type == "person" { + var person models.Actor + if err := util.GetByURL(current.Entity, &person); err != nil { + // Unmark visited to allow retries + log.Printf("Retryable error on %s: %v", current.Entity, err) + continue + } + // Enqueue children without visited checks for _, credit := range person.Movies { - node := Node{ + child := Node{ Entity: credit.URL, Type: "movie", From: ¤t, Link: "Movie: " + credit.Name + " (" + credit.Role + ")", } - - if _, ok := visited.Load(credit.URL); !ok { - mu.Lock() - queue = append(queue, node) - mu.Unlock() - } + queue.mu.Lock() + queue.items = append(queue.items, child) + queue.cond.Signal() + queue.mu.Unlock() } } else { var movie models.Movie - if err := util.GetByURL(current.Entity, &movie); err != nil { - visited.Store(current.Entity, true) - log.Printf("Skipping inaccessible node: %s (%v)", current.Entity, err) - + log.Printf("Retryable error on %s: %v", current.Entity, err) //since some of the links are not accessible, the retry logic is removed. + continue } - log.Println(movie.URL) for _, cast := range movie.Cast { - - node := Node{ + child := Node{ Entity: cast.URL, Type: "person", From: ¤t, Link: "Cast: " + cast.Name + " (" + cast.Role + ")", } - if _, ok := visited.Load(cast.URL); !ok { - mu.Lock() - queue = append(queue, node) - mu.Unlock() - } - + queue.mu.Lock() + queue.items = append(queue.items, child) + queue.cond.Signal() + queue.mu.Unlock() } for _, crew := range movie.Crew { - - node := Node{ + child := Node{ Entity: crew.URL, Type: "person", From: ¤t, Link: "Crew: " + crew.Name + " (" + crew.Role + ")", } - - if _, ok := visited.Load(crew.URL); !ok { - mu.Lock() - queue = append(queue, node) - mu.Unlock() - } + queue.mu.Lock() + queue.items = append(queue.items, child) + queue.cond.Signal() + queue.mu.Unlock() } } - }(current) + } } - wg.Wait() + } + // Start workers + numWorkers := 30 + wg.Add(numWorkers) + for i := 0; i < numWorkers; i++ { + go worker() } - return nil + go func() { + wg.Wait() + once.Do(func() { close(result) }) + }() + + select { + case res := <-result: + return res + case <-ctx.Done(): + return nil + } } func printPath(node *Node) { diff --git a/pkg/util/util.go b/pkg/util/util.go index 72ef5a0..efb437b 100644 --- a/pkg/util/util.go +++ b/pkg/util/util.go @@ -10,7 +10,7 @@ import ( "golang.org/x/time/rate" ) -var limiter = rate.NewLimiter(rate.Every(time.Second/5), 3) +var limiter = rate.NewLimiter(rate.Every(1*time.Millisecond), 1000) // fetch resources func GetByURL(url string, target any) error { From eb8ff189c4034bba50af5e8eb6dd288500d2bfbb Mon Sep 17 00:00:00 2001 From: karthik kalarikal Date: Sat, 22 Mar 2025 19:43:01 +0530 Subject: [PATCH 6/6] readme --- README.md | 52 -------------------------------------------- Readme.md | 21 ++++++++++++++++++ pkg/traversal/bfs.go | 17 +++++++++++---- 3 files changed, 34 insertions(+), 56 deletions(-) delete mode 100644 README.md create mode 100644 Readme.md diff --git a/README.md b/README.md deleted file mode 100644 index 6df56a5..0000000 --- a/README.md +++ /dev/null @@ -1,52 +0,0 @@ -#Degrees of Separation - -With cinema going global these days, every one of the [A-Z]ollywoods are now connected. Use the wealth of data available at [Moviebuff](http://www.moviebuff.com) to see how. - -Write a Go program that behaves the following way: - -``` -$ degrees amitabh-bachchan robert-de-niro - -Degrees of Separation: 3 - -1. Movie: The Great Gatsby -Supporting Actor: Amitabh Bachchan -Actor: Leonardo DiCaprio - -2. Movie: The Wolf of Wall Street -Actor: Leonardo DiCaprio -Director: Martin Scorsese - -3. Movie: Taxi Driver -Director: Martin Scorsese -Actor: Robert De Niro -``` - -Your solution should use the Moviebuff data available to figure out the smallest degree of separation between the two people. -All the inputs should be Moviebuff URLs for their respective people: For Amitabh Bachchan, his page is on http://www.moviebuff.com/amitabh-bachchan and his Moviebuff URL is `amitabh-bachchan`. - -Please do not attempt to scrape the Moviebuff website - All the data is available on an S3 bucket in an easy to parse JSON format here: `https://data.moviebuff.com/{moviebuff_url}` - -To solve the example above, your solution would fetch at least the following: - -http://data.moviebuff.com/amitabh-bachchan - -http://data.moviebuff.com/the-great-gatsby - -http://data.moviebuff.com/leonardo-dicaprio - -http://data.moviebuff.com/the-wolf-of-wall-street - -http://data.moviebuff.com/martin-scorsese - -http://data.moviebuff.com/taxi-driver - -##Notes -* If you receive HTTP errors when trying to fetch the data, that might be the CDN throttling you. Luckily, Go has some very elegant idioms for rate limiting :) -* There may be a discrepancy in some cases where a movie appears on an actor's list but not vice versa. This usually happens when we edit data while exporting it, so feel free to either ignore these mismatches or handle them in some way. - -Write a program in any language you want (If you're here from Gophercon, use Go :D) that does this. Feel free to make your own input and output format / command line tool / GUI / Webservice / whatever you want. Feel free to hold the dataset in whatever structure you want, but try not to use external databases - as far as possible stick to your langauage without bringing in MySQL/Postgres/MongoDB/Redis/Etc. - -To submit a solution, fork this repo and send a Pull Request on Github. - -For any questions or clarifications, raise an issue on this repo and we'll answer your questions as fast as we can. diff --git a/Readme.md b/Readme.md new file mode 100644 index 0000000..5d0bfb5 --- /dev/null +++ b/Readme.md @@ -0,0 +1,21 @@ +# Degrees of Separation CLI + +A Go CLI application that calculates the shortest path between two actors/movie professionals using data from [Moviebuff](https://www.moviebuff.com/). The solution uses concurrent BFS (Breadth-First Search) to efficiently find connections through movies and crew members. + +## Features + +- **Concurrent BFS implementation in Graph** with worker pooling +- **Rate limiting** to handle API throttling (currently 1000 requests per second with a 1000 as burst) +- **Smart retry mechanism** for failed requests (some entries are not accessible) +- **Path reconstruction** showing movie connections +- **Efficient memory management** with sync.Map and compact storage + +## Installation + +```go build -o degrees``` + +```./degrees ``` + +```./degrees -a amitabh-bachchan -b robert-de-niro``` + + diff --git a/pkg/traversal/bfs.go b/pkg/traversal/bfs.go index d106a3f..82d6852 100644 --- a/pkg/traversal/bfs.go +++ b/pkg/traversal/bfs.go @@ -4,6 +4,7 @@ import ( "context" "fmt" "log" + "strings" "sync" "github.com/karthikkalarikal/Qube/models" @@ -40,7 +41,6 @@ func bfs(start, end string) *Node { queue.cond = sync.NewCond(&queue.mu) result := make(chan *Node, 1) - // Initialize queue with start node queue.mu.Lock() queue.items = append(queue.items, Node{Entity: start, Type: "person"}) queue.cond.Signal() @@ -85,11 +85,13 @@ func bfs(start, end string) *Node { if current.Type == "person" { var person models.Actor if err := util.GetByURL(current.Entity, &person); err != nil { - // Unmark visited to allow retries + if checkErrorForbidden(err) { + visited.Delete(current.Entity) + } log.Printf("Retryable error on %s: %v", current.Entity, err) continue } - // Enqueue children without visited checks + for _, credit := range person.Movies { child := Node{ Entity: credit.URL, @@ -105,7 +107,10 @@ func bfs(start, end string) *Node { } else { var movie models.Movie if err := util.GetByURL(current.Entity, &movie); err != nil { - log.Printf("Retryable error on %s: %v", current.Entity, err) //since some of the links are not accessible, the retry logic is removed. + if checkErrorForbidden(err) { + visited.Delete(current.Entity) + } + log.Printf("Retryable error on %s: %v", current.Entity, err) //since some of the links are not accessible, the retry logic is specific. continue } @@ -177,3 +182,7 @@ func printPath(node *Node) { } } } + +func checkErrorForbidden(err error) bool { + return strings.Contains(err.Error(), "access denied for URL:") +}