Skip to content

Commit

Permalink
Merge pull request #4 from dadosjusbr/corrigindo-acesso
Browse files Browse the repository at this point in the history
atualizando url e adicionando user-agent e downloadTimeout
  • Loading branch information
danielfireman authored Feb 27, 2024
2 parents a39a112 + ff88caf commit bfe5135
Show file tree
Hide file tree
Showing 2 changed files with 21 additions and 9 deletions.
16 changes: 9 additions & 7 deletions crawler.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ import (
type crawler struct {
collectionTimeout time.Duration
timeBetweenSteps time.Duration
downloadTimeout time.Duration
year string
month string
output string
Expand All @@ -34,6 +35,7 @@ func (c crawler) crawl() ([]string, error) {
chromedp.Flag("headless", true), // mude para false para executar com navegador visível.
chromedp.NoSandbox,
chromedp.DisableGPU,
chromedp.UserAgent("Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Mobile Safari/537.36"),
)...,
)
defer allocCancel()
Expand Down Expand Up @@ -108,11 +110,11 @@ func (c crawler) abreCaixaDialogo(ctx context.Context, tipo string) error {
var baseURL string
var selectYear string
if tipo == "contra" {
baseURL = "http://www.mpap.mp.br/transparencia/index.php?pg=consulta_folha_membros_ativos"
selectYear = `//select[@id="ano"]`
baseURL = "https://portal.mpap.mp.br/transparencia/index.php?pg=consulta_folha_membros_ativos"
selectYear = `//*[@id="ano"]`
} else {
baseURL = "https://www.mpap.mp.br/transparencia/index.php?pg=consulta_verbas_indenizatorias"
selectYear = `//select[@id="ano_verbas"]`
baseURL = "https://portal.mpap.mp.br/transparencia/index.php?pg=consulta_verbas_indenizatorias"
selectYear = `//*[@id="ano_verbas"]`
}

return chromedp.Run(ctx,
Expand All @@ -124,7 +126,7 @@ func (c crawler) abreCaixaDialogo(ctx context.Context, tipo string) error {
chromedp.Sleep(c.timeBetweenSteps),

// Seleciona mes
chromedp.SetValue(`//select[@id="mes"]`, strings.TrimPrefix(c.month, "0"), chromedp.BySearch, chromedp.NodeVisible),
chromedp.SetValue(`//*[@id="mes"]`, strings.TrimPrefix(c.month, "0"), chromedp.BySearch, chromedp.NodeVisible),
chromedp.Sleep(c.timeBetweenSteps),

// Busca
Expand All @@ -144,13 +146,13 @@ func (c crawler) exportaPlanilha(ctx context.Context, fName string) error {
chromedp.Run(ctx,
// Clica no botão de download
chromedp.Click(`/html/body/div[1]/center/div/fieldset/div[3]/form/button[2]`, chromedp.BySearch, chromedp.NodeVisible),
chromedp.Sleep(c.timeBetweenSteps),
chromedp.Sleep(c.downloadTimeout),
)
} else {
chromedp.Run(ctx,
// Clica no botão de download
chromedp.Click(`/html/body/div[1]/center/div/fieldset/div/form/button[2]`, chromedp.BySearch, chromedp.NodeVisible),
chromedp.Sleep(c.timeBetweenSteps),
chromedp.Sleep(c.downloadTimeout),
)
}

Expand Down
14 changes: 12 additions & 2 deletions main.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,8 @@ import (

const (
defaultGeneralTimeout = 4 * time.Minute // Duração máxima total da coleta de todos os arquivos. Valor padrão calculado a partir de uma média de execuções ~4.5min
defaulTimeBetweenSteps = 5 * time.Second //Tempo de espera entre passos do coletor."
defaulTimeBetweenSteps = 5 * time.Second //Tempo de espera entre passos do coletor."
defaultFileDownloadTimeout = 20 * time.Second // Duração que o coletor deve esperar até que o download de cada um dos arquivos seja concluído
)

func main() {
Expand Down Expand Up @@ -51,9 +52,18 @@ func main() {
log.Fatalf("Invalid TIME_BETWEEN_STEPS (\"%s\"): %q", os.Getenv("TIME_BETWEEN_STEPS"), err)
}
}
downloadTimeout := defaultFileDownloadTimeout
if os.Getenv("DOWNLOAD_TIMEOUT") != "" {
var err error
downloadTimeout, err = time.ParseDuration(os.Getenv("DOWNLOAD_TIMEOUT"))
if err != nil {
log.Fatalf("Invalid DOWNLOAD_TIMEOUT (\"%s\"): %q", os.Getenv("DOWNLOAD_TIMEOUT"), err)
}
}
c := crawler{
collectionTimeout: generalTimeout,
timeBetweenSteps: timeBetweenSteps,
downloadTimeout: downloadTimeout,
year: year,
month: month,
output: outputFolder,
Expand All @@ -66,4 +76,4 @@ func main() {
// O parser do MPAP espera os arquivos separados por \n. Mudanças aqui tem
// refletir as expectativas lá.
fmt.Println(strings.Join(downloads, "\n"))
}
}

0 comments on commit bfe5135

Please sign in to comment.