Skip to content

Commit

Permalink
debug
Browse files Browse the repository at this point in the history
  • Loading branch information
zihanxiao23 committed Dec 9, 2024
1 parent b521351 commit a321cc4
Show file tree
Hide file tree
Showing 2 changed files with 19 additions and 6 deletions.
19 changes: 15 additions & 4 deletions src/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,16 @@
from pydantic import BaseModel
from pyspark.sql import SparkSession
from pyspark.sql.functions import col, avg, count
from pyspark.sql.types import StructType, StructField, IntegerType, StringType, FloatType

# Initialize FastAPI
app = FastAPI()

# Initialize SparkSession
spark = SparkSession.builder.appName("microservice").getOrCreate()
spark = SparkSession.builder \
.appName("microservice") \
.master("local[*]") \
.getOrCreate()

# Reduce Spark log verbosity
spark.sparkContext.setLogLevel("WARN")
Expand All @@ -18,14 +22,21 @@ class Data(BaseModel):
gender: str
salary: float

# Define schema for Spark DataFrame
schema = StructType([
StructField("id", IntegerType(), True),
StructField("gender", StringType(), True),
StructField("salary", FloatType(), True)
])

@app.post("/process")
async def process_data(data: list[Data]):
"""
Receive JSON data stream and return analysis results
"""
try:
# Convert input data to Spark DataFrame
df = spark.createDataFrame([d.dict() for d in data])
# Convert input data to Spark DataFrame with schema
df = spark.createDataFrame([d.dict() for d in data], schema=schema)
# Perform simple data analysis
result = df.groupBy("gender").agg(
avg("salary").alias("average_salary"),
Expand All @@ -35,4 +46,4 @@ async def process_data(data: list[Data]):
result_json = [{"gender": row["gender"], "average_salary": row["average_salary"], "count": row["count"]} for row in result]
return result_json
except Exception as e:
raise HTTPException(status_code=500, detail=str(e))
raise HTTPException(status_code=500, detail=str(e))
6 changes: 4 additions & 2 deletions src/load_test.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,11 @@
from locust import HttpUser, task, between
import random

class LoadTest(HttpUser):
wait_time = between(1, 2)

@task
def process_data(self):
data = [{"id": 1, "gender": "M", "salary": 5000}]
self.client.post("/process", json=data, timeout=10)
# Generate a random batch of data
data = [{"id": i, "gender": random.choice(["M", "F"]), "salary": random.uniform(3000, 10000)} for i in range(100)]
self.client.post("/process", json=data, timeout=20)

0 comments on commit a321cc4

Please sign in to comment.