Skip to content

Commit

Permalink
beginnings of a neo4j driver
Browse files Browse the repository at this point in the history
  • Loading branch information
alexcannan committed Jul 31, 2023
1 parent 513ea43 commit 7b101ce
Show file tree
Hide file tree
Showing 4 changed files with 78 additions and 18 deletions.
11 changes: 11 additions & 0 deletions articlesa/neo/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,8 @@ WITH article
UNWIND $authors AS author_name
MERGE (author:Author {name: author_name})
MERGE (article)-[:AUTHORED_BY]->(author)
MERGE (publisher:Publisher {netloc: $publisher})
MERGE (article)-[:PUBLISHED_BY]->(publisher)
```

#### Get article that matches url
Expand All @@ -40,3 +42,12 @@ MERGE (article)-[:AUTHORED_BY]->(author)
MATCH (article:Article {url: $url})
RETURN article
```

#### Get a count of all articles

```cypher
MATCH (article:Article)
MATCH (author:Author)
MATCH (publisher:Publisher)
RETURN COUNT(article) AS articleCount, COUNT(author) AS authorCount, COUNT(publisher) AS publisherCount
```
52 changes: 47 additions & 5 deletions articlesa/neo/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,19 +7,21 @@

import os

from neo4j import AsyncGraphDatabase, AsyncDriver
from neo4j import AsyncGraphDatabase, AsyncDriver, EagerResult

from articlesa.types import ParsedArticle

class Neo4JDriver():

class Neo4JArticleDriver():
"""
Neo4JDriver is an async context manager for interacting with the neo4j database.
Neo4JArticleDriver is an async context manager for interacting with the neo4j database.
You can do a lot of things with it, I promise.
All reads and writes to the database happen within this context manager.
"""
_driver: AsyncDriver
uri: str = os.environ.get("NEO4J_URI", "bolt://localhost:7687")

async def __aenter__(self) -> 'Neo4JDriver':
async def __aenter__(self) -> 'Neo4JArticleDriver':
"""Enter the async context manager and return the driver."""
async with AsyncGraphDatabase.driver(self.uri) as driver:
self._driver = driver
Expand All @@ -28,3 +30,43 @@ async def __aenter__(self) -> 'Neo4JDriver':
async def __aexit__(self, exc_type, exc_value, traceback) -> None: # noqa
"""Exit the async context manager."""
await self._driver.close()

async def get_stats(self) -> EagerResult:
"""Get quick stats describing the database."""
query = """\
MATCH (article:Article)
MATCH (author:Author)
MATCH (publisher:Publisher)
RETURN COUNT(article) AS articleCount, COUNT(author) AS authorCount, COUNT(publisher) AS publisherCount
"""
return await self._driver.execute_query(query)

async def put_article(self, parsed_article: ParsedArticle) -> None:
"""
Put a parsed article into the database.
Includes putting author and publisher nodes.
"""
query = """\
MERGE (article:Article {url: $url})
SET article.title = $title,
article.links = $links,
article.published = $published,
article.parsedAtUtc = $parsedAtUtc
WITH article
UNWIND $authors AS author_name
MERGE (author:Author {name: author_name})
MERGE (article)-[:AUTHORED_BY]->(author)
MERGE (publisher:Publisher {netloc: $publisher})
MERGE (article)-[:PUBLISHED_BY]->(publisher)
"""
await self._driver.execute_query(
query,
url=parsed_article.url,
title=parsed_article.title,
links=parsed_article.links,
published=parsed_article.published,
parsedAtUtc=parsed_article.parsedAtUtc,
authors=parsed_article.authors,
publisher=parsed_article.publisher,
)
26 changes: 14 additions & 12 deletions articlesa/neo/__main__.py
Original file line number Diff line number Diff line change
@@ -1,20 +1,22 @@
"""Entrypoint for random neo4j tasks/tests."""
import asyncio
from pprint import pprint

from articlesa.neo import Neo4JDriver
from articlesa.neo import Neo4JArticleDriver

article_urls = [
"https://apnews.com/article/washington-virginia-maryland-loud-boom-crash-military-jet-biden-joint-base-andrews-7116356c23f2ade0d6c842159e261f1b",
"https://www.thegatewaypundit.com/2019/11/revealed-adam-schiff-connected-to-both-companies-named-in-7-4-billion-burisma-us-ukraine-corruption-case/",
]

async def main():
query = '''\
CREATE (article:FakeArticle {
title: "Your Article Title",
content: "Lorem ipsum dolor sit amet, consectetur adipiscing elit...",
published_date: "2023-07-25"
})
'''
async with Neo4JDriver() as driver:
response = await driver._driver.execute_query(query)
pprint(response.summary.__dict__)

async def main() -> None: # noqa: D103
from articlesa.worker.parse import parse_article, ParsedArticle
async with Neo4JArticleDriver() as driver:
article = ParsedArticle.parse_obj(await parse_article(article_urls[0]))
await driver.put_article(article)
response = await driver.get_stats()
pprint(response.summary.__dict__) # noqa: T203


asyncio.run(main())
7 changes: 6 additions & 1 deletion articlesa/types.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,11 +84,16 @@ class ParsedArticle(BaseModel):
text: str
authors: list[str]
links: list[str]
published: str
published: str # isoformat
parsedAtUtc: datetime
urlhash: Optional[str] = None
depth: Optional[int] = None

@property
def publisher(self) -> str:
"""Return the publisher's netloc."""
return urlparse(self.url).netloc


class StreamEvent(Enum):
"""SSE event types."""
Expand Down

0 comments on commit 7b101ce

Please sign in to comment.