Skip to content

Commit

Permalink
🧪 Added scrapper image as another service of the docker-compose. Note…
Browse files Browse the repository at this point in the history
…: selenium doesn't work as expected
  • Loading branch information
lhbelfanti committed Sep 5, 2024
1 parent 7fe8a1a commit be3d6f6
Show file tree
Hide file tree
Showing 9 changed files with 185 additions and 37 deletions.
3 changes: 0 additions & 3 deletions .env.example

This file was deleted.

24 changes: 20 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@

---


# AHBCC: Adverse Human Behaviors Corpus Creator

Adverse Human Behaviors is a term created to encompass all types of human behaviors that affect one or more individuals in physical, psychological, or emotional ways.
Expand Down Expand Up @@ -94,15 +93,32 @@ erDiagram
}
```

#### Necessary files to start the database
### Run

```
docker compose -f build/compose.yml up --build
```

#### Necessary files to run the app

To connect to the database create a `.env` file in the root of the project or rename the provided [.env.example](.env.example).
To connect to the database create a `.env` file inside the build folder of the project or rename the provided [.env.example](build/.env.example).

This file should contain the following environment variables:

1. For this app (this repository):
```
# App settings
APP_EXPOSED_PORT=<AHBCC Host Port>
APP_INTERNAL_PORT=<AHBCC Container Port>
# Database
DB_NAME=<Database name>
DB_USER=<Database username>
DB_PASS=<Database password>
DB_PORT=<Database port>
```

Replace the `< ... >` by the correct value. For example: `DB_NAME=<Database name>` --> `DB_NAME=ahbcc`.

2. For the env variables of the scrapper app, please check the [documentation of its repository](https://github.com/lhbelfanti/goxcrap?tab=readme-ov-file#setup).

They must also be added into the same `.env` file.
41 changes: 41 additions & 0 deletions build/.env.example
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
# ---------- AHBCC ----------

# App settings
APP_EXPOSED_PORT=4100 # Host port
APP_INTERNAL_PORT=4000 # Container port

# Database
DB_NAME=ahbcc
DB_USER=ahbcc_user
DB_PASS=ahbcc_password
DB_PORT=5432

# ---------- GoXCrap ----------

# App settings
SCRAPPER_EXPOSED_PORT=5100 # Host port
SCRAPPER_INTERNAL_PORT=5000 # Container port

# Scrapper settings
SCRAPPER_EMAIL=<Twitter account email>
SCRAPPER_USERNAME=<Twitter password>
SCRAPPER_PASSWORD=<Twitter username>
BROKER_CONCURRENT_MESSAGES=2
SCRAPPER_LOGIN_PAGE_TIMEOUT=10
SCRAPPER_LOGIN_ELEMENTS_TIMEOUT=10
SCRAPPER_LOGIN_PASSWORD_TIMEOUT=5
SCRAPPER_WAIT_TIME_AFTER_LOGIN=10
SCRAPPER_SEARCH_PAGE_TIMEOUT=10
SCRAPPER_ARTICLES_TIMEOUT=10

# Selenium Chrome driver paths
SELENIUM_DRIVER_PATH=/usr/bin/chromedriver
SELENIUM_BROWSER_PATH=/usr/bin/chromium

# RabbitMQ settings
RABBITMQ_USER=goxcrap_user
RABBITMQ_PASS=goxcrap_password
RABBITMQ_PORT=5672

# External APIs URLs
SAVE_TWEETS_API_URL=http://localhost:4100
7 changes: 4 additions & 3 deletions Dockerfile_app → build/Dockerfile_app
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,9 @@ FROM golang:1.22.3-alpine
LABEL maintainer="Lucas Belfanti"

# Install necessary dependencies
RUN apk update && apk add --no-cache curl tzdata \
RUN apk update && apk add --no-cache \
curl \
tzdata \
&& rm -rf /var/cache/apk/*

# Set timezone
Expand All @@ -20,13 +22,12 @@ RUN go mod download
COPY cmd/ ./cmd
COPY internal/ ./internal
COPY migrations/ ./migrations
COPY .env ./

# Build the application and output the binary as 'ahbcc'
RUN CGO_ENABLED=0 GOOS=linux go build -o /ahbcc ./cmd/api

# Expose port
EXPOSE 8090
EXPOSE ${API_PORT}

# Run application
CMD [ "/ahbcc" ]
Expand Down
6 changes: 4 additions & 2 deletions Dockerfile_migrations → build/Dockerfile_migrations
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,14 @@ FROM alpine:latest
LABEL maintainer="Lucas Belfanti"

# Install necessary dependencies
RUN apk update && apk add --no-cache curl tzdata \
RUN apk update && apk add --no-cache \
curl \
tzdata \
&& rm -rf /var/cache/apk/*

# Set timezone
ENV TZ=America/Argentina/Buenos_Aires

# Make HTTP request to /migrations/run/v1 endpoint
CMD ["sh", "-c", "curl -X POST http://app:8090/migrations/run/v1"]
CMD ["sh", "-c", "curl -X POST http://ahbcc:${API_PORT}/migrations/run/v1"]

66 changes: 45 additions & 21 deletions docker-compose.yml → build/compose.yml
Original file line number Diff line number Diff line change
@@ -1,65 +1,89 @@
services:
app:
corpus_creator:
build:
context: .
dockerfile: Dockerfile_app
container_name: app
context: ../
dockerfile: build/Dockerfile_app
container_name: ahbcc
ports:
- "${APP_EXPOSED_PORT}:${APP_INTERNAL_PORT}"
environment:
API_PORT: ${APP_INTERNAL_PORT:-4001}
POSTGRES_DB_NAME: ${DB_NAME}
POSTGRES_DB_USER: ${DB_USER}
POSTGRES_DB_PASS: ${DB_PASS}
ports:
- 8080:8090
restart: on-failure
POSTGRES_DB_PORT: ${DB_PORT}
env_file:
- .env
volumes:
- .:/app
- ../:/app
depends_on:
postgres_db:
condition: service_healthy
networks:
- network
- corpus_creator
- scrapper
restart: on-failure
healthcheck:
test: ["CMD-SHELL", "sh -c 'curl -sSf http://localhost:8090/ping/v1 || exit 1'"]
test: ["CMD-SHELL", "sh -c 'curl -sSf http://localhost:${APP_INTERNAL_PORT}/ping/v1 || exit 1'"]
interval: 5s
timeout: 10s
retries: 5

migrations:
image: alpine:latest
build:
context: .
dockerfile: Dockerfile_migrations
context: ../
dockerfile: build/Dockerfile_migrations
container_name: migrations
restart: "no"
environment:
API_PORT: ${APP_INTERNAL_PORT:-4001}
env_file:
- .env
depends_on:
app:
corpus_creator:
condition: service_healthy
networks:
- network
- corpus_creator
restart: "no"

postgres_db:
image: postgres:latest
container_name: postgres
ports:
- 1234:5432
environment:
POSTGRES_DB: ${DB_NAME}
POSTGRES_USER: ${DB_USER}
POSTGRES_PASSWORD: ${DB_PASS}
TZ: "America/Argentina/Buenos_Aires"
ports:
- 1234:5432
env_file:
- .env
volumes:
- database:/var/lib/postgresql/data
- postgres_data:/var/lib/postgresql/data
networks:
- network
- corpus_creator
healthcheck:
test: ["CMD-SHELL", "sh -c 'pg_isready -U \"$DB_USER\" -d \"$DB_NAME\"'"]
interval: 10s
timeout: 10s
retries: 5

scrapper:
extends:
file: scrapper/compose.yml
service: scrapper

rabbitmq:
extends:
file: scrapper/compose.yml
service: rabbitmq

volumes:
database:
postgres_data:
rabbitmq_data:

networks:
network:
corpus_creator:
driver: bridge
scrapper:
driver: bridge
63 changes: 63 additions & 0 deletions build/scrapper/compose.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
services:
scrapper:
image: lhbelfanti/goxcrap:1.1.0
container_name: goxcrap
command: ["/goxcrap", "--prod"]
ports:
- "${SCRAPPER_EXPOSED_PORT}:${SCRAPPER_INTERNAL_PORT}"
environment:
API_PORT: ${SCRAPPER_INTERNAL_PORT:-5001}
EMAIL: ${SCRAPPER_EMAIL}
PASSWORD: ${SCRAPPER_PASSWORD}
USERNAME: ${SCRAPPER_USERNAME}
DRIVER_PATH: ${SELENIUM_DRIVER_PATH}
BROWSER_PATH: ${SELENIUM_BROWSER_PATH}
BROKER_CONCURRENT_MESSAGES: ${BROKER_CONCURRENT_MESSAGES}
LOGIN_PAGE_TIMEOUT: ${SCRAPPER_LOGIN_PAGE_TIMEOUT}
LOGIN_ELEMENTS_TIMEOUT: ${SCRAPPER_LOGIN_ELEMENTS_TIMEOUT}
LOGIN_PASSWORD_TIMEOUT: ${SCRAPPER_LOGIN_PASSWORD_TIMEOUT}
WAIT_TIME_AFTER_LOGIN: ${SCRAPPER_WAIT_TIME_AFTER_LOGIN}
SEARCH_PAGE_TIMEOUT: ${SCRAPPER_SEARCH_PAGE_TIMEOUT}
ARTICLES_TIMEOUT: ${SCRAPPER_ARTICLES_TIMEOUT}
RABBITMQ_USER: ${RABBITMQ_USER}
RABBITMQ_PASS: ${RABBITMQ_PASS}
RABBITMQ_PORT: ${RABBITMQ_PORT:-5672}
env_file:
- ../.env
volumes:
- .:/scrapper
depends_on:
rabbitmq:
condition: service_healthy
networks:
- scrapper
- corpus_creator
restart: on-failure
healthcheck:
test: [ "CMD-SHELL", "sh -c 'curl -sSf http://localhost:${SCRAPPER_INTERNAL_PORT}/ping/v1 || exit 1'" ]
interval: 5s
timeout: 10s
retries: 5

rabbitmq:
image: "rabbitmq:3-management-alpine"
container_name: rabbitmq
hostname: 'goxcrap'
ports:
- "5672:${RABBITMQ_PORT:-5672}"
- 15672:15672
environment:
RABBITMQ_DEFAULT_USER: ${RABBITMQ_USER}
RABBITMQ_DEFAULT_PASS: ${RABBITMQ_PASS}
env_file:
- ../.env
volumes:
- rabbitmq_data:/var/lib/rabbitmq
networks:
- scrapper
healthcheck:
test: [ "CMD-SHELL", "rabbitmq-diagnostics -q check_running && rabbitmq-diagnostics -q check_local_alarms" ]
start_period: 1m
interval: 10s
timeout: 10s
retries: 5
7 changes: 5 additions & 2 deletions cmd/api/main.go
Original file line number Diff line number Diff line change
@@ -1,9 +1,11 @@
package main

import (
"fmt"
"log"
"log/slog"
"net/http"
"os"

"ahbcc/cmd/api/migrations"
"ahbcc/cmd/api/ping"
Expand Down Expand Up @@ -36,8 +38,9 @@ func main() {
router.HandleFunc("POST /tweets/v1", tweets.InsertHandlerV1(insertTweets))

/* --- Server --- */
slog.Info("AHBCC server is ready to receive request on port :8090")
err := http.ListenAndServe(":8090", router)
port := fmt.Sprintf(":%s", os.Getenv("API_PORT"))
slog.Info(fmt.Sprintf("AHBCC server is ready to receive request on port %s", port))
err := http.ListenAndServe(port, router)
if err != nil {
log.Fatalf("Could not start server: %s\n", err.Error())
}
Expand Down
5 changes: 3 additions & 2 deletions internal/database/types.go
Original file line number Diff line number Diff line change
Expand Up @@ -30,12 +30,13 @@ var (
pgOnce sync.Once
)

const databaseURL string = "postgresql://%s:%s@postgres_db:5432/%s?sslmode=disable"
const databaseURL string = "postgresql://%s:%s@postgres_db:%s/%s?sslmode=disable"

func resolveDatabaseURL() string {
dbUser := os.Getenv("POSTGRES_DB_USER")
dbPass := os.Getenv("POSTGRES_DB_PASS")
dbName := os.Getenv("POSTGRES_DB_NAME")
dbPort := os.Getenv("POSTGRES_DB_PORT")

return fmt.Sprintf(databaseURL, dbUser, dbPass, dbName)
return fmt.Sprintf(databaseURL, dbUser, dbPass, dbPort, dbName)
}

0 comments on commit be3d6f6

Please sign in to comment.