-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
0 parents
commit 2dbddc0
Showing
11 changed files
with
832 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,53 @@ | ||
name: Create and publish a Docker image to Github Packages | ||
|
||
on: | ||
push: | ||
tags: | ||
- v* | ||
|
||
env: | ||
REGISTRY: ghcr.io | ||
IMAGE_NAME: ${{ github.repository }} | ||
|
||
jobs: | ||
build-and-push-image: | ||
runs-on: ubuntu-latest | ||
permissions: | ||
contents: read | ||
packages: write | ||
attestations: write | ||
id-token: write | ||
steps: | ||
- name: Set up QEMU | ||
uses: docker/setup-qemu-action@v3 | ||
|
||
- name: Set up Docker Buildx | ||
uses: docker/setup-buildx-action@v3 | ||
|
||
- name: Checkout repository | ||
uses: actions/checkout@v4 | ||
|
||
- name: Log in to the Container registry | ||
uses: docker/login-action@65b78e6e13532edd9afa3aa52ac7964289d1a9c1 | ||
with: | ||
registry: ${{ env.REGISTRY }} | ||
username: ${{ github.actor }} | ||
password: ${{ secrets.GITHUB_TOKEN }} | ||
|
||
- name: Extract metadata (tags, labels) for Docker | ||
id: meta | ||
uses: docker/metadata-action@9ec57ed1fcdbf14dcef7dfbe97b2010124a938b7 | ||
with: | ||
images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }} | ||
|
||
- name: Build and push Docker image | ||
id: push | ||
uses: docker/build-push-action@f2a1d5e99d037542a71f64918e516c093c6f3fc4 | ||
with: | ||
context: . | ||
push: true | ||
cache-from: type=gha | ||
cache-to: type=gha,mode=max | ||
platforms: linux/amd64 | ||
tags: ${{ steps.meta.outputs.tags }} | ||
labels: ${{ steps.meta.outputs.labels }} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,4 @@ | ||
__pycache__ | ||
*.pyc | ||
.cache | ||
.venv |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,45 @@ | ||
FROM python:3.12.4-slim-bookworm | ||
|
||
ARG TARGETPLATFORM | ||
|
||
ENV POETRY_VERSION=1.8.3 | ||
ENV POETRY_HOME=/opt/poetry | ||
ENV POETRY_VENV=/opt/poetry-venv | ||
ENV POETRY_CACHE_DIR=/opt/.cache | ||
|
||
RUN apt-get update | ||
RUN apt-get install -qq -y --fix-missing --no-install-recommends \ | ||
build-essential \ | ||
gcc \ | ||
openssl \ | ||
libffi-dev \ | ||
libssl-dev \ | ||
pkg-config \ | ||
curl | ||
|
||
RUN if [ "$TARGETPLATFORM" = "linux/arm/v7" ] ; then curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sed 's#/proc/self/exe#\/bin\/sh#g' | sh -s -- -y && \ | ||
. $HOME/.cargo/env; \ | ||
fi | ||
|
||
ENV PATH="/root/.cargo/bin:${PATH}" | ||
|
||
RUN python3 -m venv $POETRY_VENV \ | ||
&& $POETRY_VENV/bin/pip install -U pip setuptools \ | ||
&& $POETRY_VENV/bin/pip install poetry==${POETRY_VERSION} | ||
|
||
ENV PATH="${PATH}:${POETRY_VENV}/bin" | ||
|
||
WORKDIR /app/ | ||
|
||
COPY ./pyproject.toml ./poetry.lock* /app/ | ||
|
||
ARG INSTALL_DEV=false | ||
RUN sh -c "if [ $INSTALL_DEV = true ] ; then poetry install --no-root ; else poetry install --no-root --only main ; fi" | ||
|
||
ENV PYTHONPATH "/app/scrappey_proxy" | ||
|
||
COPY ./scrappey_proxy /app/scrappey_proxy | ||
|
||
EXPOSE 8191 | ||
|
||
CMD ["poetry", "run", "gunicorn", "main:app", "--bind", "0.0.0.0:8191", "--log-level", "debug", "--timeout", "600"] |
Large diffs are not rendered by default.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,17 @@ | ||
[tool.poetry] | ||
name = "scrappey_proxy" | ||
version = "0.1.0" | ||
description = "Flaresolverr substitute using Scrappey.com" | ||
authors = ["Anthony RAFFY <anthony.raffy38@gmail.com>"] | ||
readme = "README.md" | ||
|
||
[tool.poetry.dependencies] | ||
python = "3.12.4" | ||
flask = "^3.0.3" | ||
scrappeycom = "^0.3.8" | ||
gunicorn = "^22.0.0" | ||
|
||
|
||
[build-system] | ||
requires = ["poetry-core"] | ||
build-backend = "poetry.core.masonry.api" |
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,107 @@ | ||
import logging | ||
from typing import Callable | ||
|
||
from scrappey_proxy import utils | ||
|
||
STATUS_OK = "ok" | ||
STATUS_ERROR = "error" | ||
|
||
logger = logging.getLogger("gunicorn.error") | ||
|
||
|
||
class ChallengeResolutionResultT: | ||
url: str | None = None | ||
status: int | None = None | ||
headers: dict | None = None | ||
response: str | None = None | ||
cookies: list | None = None | ||
userAgent: str | None = None | ||
|
||
def __init__(self, _dict): | ||
self.__dict__.update(_dict) | ||
|
||
|
||
class ChallengeResolutionT: | ||
status: str | None = None | ||
message: str | None = None | ||
result: ChallengeResolutionResultT | None = None | ||
|
||
def __init__(self, _dict): | ||
self.__dict__.update(_dict) | ||
if self.result is not None: | ||
self.result = ChallengeResolutionResultT(self.result) | ||
|
||
|
||
class V1RequestBase(object): | ||
cmd: str | None = None | ||
cookies: list | None = None | ||
maxTimeout: int | None = None | ||
proxy: dict | None = None | ||
session: str | None = None | ||
session_ttl_minutes: int | None = None | ||
headers: list | None = None # deprecated v2.0.0, not used | ||
userAgent: str | None = None # deprecated v2.0.0, not used | ||
# V1Request | ||
url: str | None = None | ||
postData: str | None = None | ||
returnOnlyCookies: bool | None = None | ||
download: bool | None = None # deprecated v2.0.0, not used | ||
returnRawHtml: bool | None = None # deprecated v2.0.0, not used | ||
|
||
def __init__(self, _dict): | ||
self.__dict__.update(_dict) | ||
|
||
|
||
class V1ResponseBase(object): | ||
# V1ResponseBase | ||
status: str | None = None | ||
message: str | None = None | ||
session: str | None = None | ||
sessions: list[str] | None = None | ||
startTimestamp: int | None = None | ||
endTimestamp: int | None = None | ||
version: str | None = None | ||
# V1ResponseSolution | ||
solution: ChallengeResolutionResultT | None = None | ||
# hidden vars | ||
__error_500__: bool = False | ||
|
||
def __init__(self, _dict): | ||
self.__dict__.update(_dict) | ||
if self.solution is not None: | ||
self.solution = ChallengeResolutionResultT(self.solution) | ||
|
||
|
||
def controller_v1_logic( | ||
req: V1RequestBase, get_handler: Callable[[V1RequestBase], V1ResponseBase] | ||
) -> V1ResponseBase: | ||
if req.cmd is None: | ||
raise Exception("Request parameter 'cmd' is mandatory.") | ||
|
||
if req.maxTimeout is None or int(req.maxTimeout) < 1: | ||
req.maxTimeout = 60000 | ||
|
||
res: V1ResponseBase | ||
if req.cmd == "request.get": | ||
res = get_handler(req) | ||
else: | ||
raise Exception(f"Request parameter 'cmd' = '{req.cmd}' is invalid.") | ||
|
||
return res | ||
|
||
|
||
def controller_v1_handler( | ||
req: V1RequestBase, get_handler: Callable[[V1RequestBase], V1ResponseBase] | ||
): | ||
res: V1ResponseBase | ||
logger.info(f"Incoming request => POST /v1 body: {utils.object_to_dict(req)}") | ||
try: | ||
res = controller_v1_logic(req, get_handler) | ||
except Exception as e: | ||
res = V1ResponseBase({}) | ||
res.__error_500__ = True | ||
res.status = STATUS_ERROR | ||
res.message = f"Error: {e}" | ||
|
||
res.version = "3.0.0" | ||
return res |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,114 @@ | ||
import logging | ||
import os | ||
from typing import Dict, List | ||
|
||
import requests | ||
from flask import Flask, request | ||
|
||
from scrappey_proxy import utils | ||
from scrappey_proxy.flaresolverr import ( | ||
STATUS_OK, | ||
ChallengeResolutionResultT, | ||
ChallengeResolutionT, | ||
V1RequestBase, | ||
V1ResponseBase, | ||
controller_v1_handler, | ||
) | ||
from scrappey_proxy.scrappey import ScrappeyResponse, get_scrappey | ||
|
||
logger = logging.getLogger("gunicorn.error") | ||
|
||
PROXY_USERNAME = os.environ.get("PROXY_USERNAME", "") | ||
PROXY_PASSWORD = os.environ.get("PROXY_PASSWORD", "") | ||
PROXY_INTERNAL_IP = os.environ["PROXY_INTERNAL_IP"] | ||
PROXY_EXTERNAL_IP = os.environ["PROXY_EXTERNAL_IP"] | ||
PROXY_INTERNAL_PORT = os.environ["PROXY_INTERNAL_PORT"] | ||
PROXY_EXTERNAL_PORT = os.environ["PROXY_EXTERNAL_PORT"] | ||
|
||
saved_cookies = [] | ||
saved_headers = {} | ||
|
||
proxy = f"{PROXY_USERNAME}:{PROXY_PASSWORD}@{PROXY_INTERNAL_IP}:{PROXY_INTERNAL_PORT}" | ||
proxies = {"http": proxy, "https": proxy} | ||
|
||
app = Flask(__name__) | ||
|
||
|
||
def save_cookies(cookies: List[Dict[str, str]]): | ||
global saved_cookies | ||
saved_cookies = cookies | ||
|
||
|
||
def save_user_agent(user_agent: str): | ||
global saved_headers | ||
saved_headers["User-Agent"] = user_agent | ||
|
||
|
||
def get_sendable_cookies(): | ||
sendable_cookies = {} | ||
for cookie in saved_cookies: | ||
sendable_cookies[cookie["name"]] = cookie["value"] | ||
return sendable_cookies | ||
|
||
|
||
def cmd_request_get(req: V1RequestBase) -> V1ResponseBase: | ||
challenge_res_result = ChallengeResolutionResultT({}) | ||
challenge_res_result.url = req.url | ||
challenge_res_result.status = 200 | ||
|
||
challenge_res = ChallengeResolutionT({}) | ||
challenge_res.status = STATUS_OK | ||
|
||
if not req.url: | ||
raise Exception("Request URL should be present") | ||
|
||
logger.info(f"Simple request to {req.url} to check for cloudflare") | ||
logger.debug(f"Using {proxies} as local proxy") | ||
basic_req = requests.get( | ||
req.url, cookies=get_sendable_cookies(), headers=saved_headers, proxies=proxies | ||
) | ||
|
||
if utils.detect_cloudflare(basic_req): | ||
logger.info("Detected Cloudflare") | ||
scrappey_res: ScrappeyResponse = get_scrappey(req) | ||
challenge_res_result.cookies = scrappey_res.cookies | ||
challenge_res.message = "Challenge solved!" | ||
challenge_res_result.headers = {} | ||
challenge_res_result.response = scrappey_res.response | ||
challenge_res_result.userAgent = scrappey_res.user_agent | ||
save_cookies(scrappey_res.cookies) | ||
save_user_agent(scrappey_res.user_agent) | ||
else: | ||
logger.info("Cloudflare not detected or cf_clearance cookie still valid") | ||
challenge_res.message = ( | ||
"Cloudflare not detected or cf_clearance cookie still valid!" | ||
) | ||
logger.debug(basic_req.text) | ||
challenge_res_result.headers = {} | ||
challenge_res_result.cookies = saved_cookies | ||
challenge_res_result.response = basic_req.text | ||
challenge_res_result.userAgent = ( | ||
saved_headers["User-Agent"] if "User-Agent" in saved_headers else "" | ||
) | ||
|
||
challenge_res.result = challenge_res_result | ||
|
||
res = V1ResponseBase({}) | ||
res.status = challenge_res.status | ||
res.message = challenge_res.message | ||
res.solution = challenge_res.result | ||
return res | ||
|
||
|
||
@app.post("/v1") | ||
def controller_v1_endpoint(): | ||
req = V1RequestBase(request.json) | ||
res = controller_v1_handler(req, cmd_request_get) | ||
|
||
return utils.object_to_dict(res) | ||
|
||
|
||
if __name__ != "__main__": | ||
gunicorn_logger = logging.getLogger("gunicorn.error") | ||
app.logger.handlers = gunicorn_logger.handlers | ||
app.logger.setLevel(gunicorn_logger.level) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,49 @@ | ||
import logging | ||
import os | ||
from dataclasses import dataclass | ||
from typing import Dict, List | ||
|
||
from scrappeycom.scrappey import Scrappey | ||
|
||
from scrappey_proxy.flaresolverr import V1RequestBase | ||
|
||
logger = logging.getLogger("gunicorn.error") | ||
|
||
PROXY_USERNAME = os.environ.get("PROXY_USERNAME", "") | ||
PROXY_PASSWORD = os.environ.get("PROXY_PASSWORD", "") | ||
PROXY_EXTERNAL_IP = os.environ["PROXY_EXTERNAL_IP"] | ||
PROXY_EXTERNAL_PORT = os.environ["PROXY_EXTERNAL_PORT"] | ||
|
||
proxy_url = f"http://{PROXY_USERNAME}:{PROXY_PASSWORD}@{PROXY_EXTERNAL_IP}:{PROXY_EXTERNAL_PORT}" | ||
|
||
|
||
@dataclass | ||
class ScrappeyResponse: | ||
response: str | ||
status_code: int | ||
cookies: List[Dict[str, str]] | ||
user_agent: str | ||
|
||
|
||
scrappey = Scrappey(os.environ["SCRAPPEY_API_KEY"]) | ||
|
||
|
||
# Function which takes a request and forwards it to scrappey | ||
def get_scrappey(request: V1RequestBase): | ||
logger.info(f"Calling scrappey for URL : {request.url}") | ||
logger.debug(f"Using {proxy_url} as scrappey's proxy") | ||
|
||
get_request_result = scrappey.get({"url": request.url, "proxy": proxy_url}) | ||
|
||
if ( | ||
"solution" in get_request_result | ||
and "response" in get_request_result["solution"] | ||
): | ||
return ScrappeyResponse( | ||
get_request_result["solution"]["response"], | ||
200, | ||
get_request_result["solution"]["cookies"], | ||
get_request_result["solution"]["userAgent"], | ||
) | ||
else: | ||
return ScrappeyResponse("", 500, [], "") |
Oops, something went wrong.