When I tried repurposing this code I got a similar error, so that portion in my code now looks like this:
query = """
{
posts(input: {
terms: {
view: "new"
after: "%s"
before: "%s"
limit: %d
meta: null
}
}) {
results {
_id
title
postedAt
postedAtFormatted
user {
username
displayName
}
wordCount
voteCount
baseScore
commentCount
tags {
name
}
}
}
}
"""
def send_graphql_request(input_query):
r = requests.post(url, json={"query": input_query}, headers=headers)
data = json.loads(r.text)
return data
def return_data(input_query):
# Initialize an empty list to store the data
data_list = []
# Set the initial date and the end date for the time intervals
start_date = pd.Timestamp("1960-01-01")
end_date = pd.Timestamp("2019-01-01")
# Define the time interval (4-month chunks) for iteration
delta = pd.DateOffset(months=4)
# Iterate over the time intervals
while start_date < pd.Timestamp.now():
print(start_date)
# Construct the query with the current time interval
query = input_query % (
start_date.strftime("%Y-%m-%d"),
end_date.strftime("%Y-%m-%d"),
3000,
)
# Call the function to send the GraphQL request and extract the data
response = send_graphql_request(query)
results = response["data"]["posts"]["results"]
# Add the current iteration's data to the list
data_list.extend(results)
# Increment the dates for the next iteration
start_date = end_date
end_date += delta
# Create a DataFrame from the collected data
df = pd.DataFrame(data_list)
df["postedAt"] = pd.to_datetime(df["postedAt"])
# Return the final DataFrame
return df
So ~3000 seemed to be a fine chunksize to get every post, and I use the date strings to iterate through the dates to get all posts in postedAt order.
When I tried repurposing this code I got a similar error, so that portion in my code now looks like this:
So ~3000 seemed to be a fine chunksize to get every post, and I use the date strings to iterate through the dates to get all posts in postedAt order.
Thanks! I tried extending this for comments, but it looks like it ignores terms; filed an issue.