feat: add go-sdk

2024-08-04 17:14:55 +08:00
parent 789c6cf5d7
commit 1378ffc138
10 changed files with 1218 additions and 0 deletions
@@ -0,0 +1,580 @@
+// Package firecrawl provides a client for interacting with the Firecrawl API.
+package firecrawl
+
+import (
+	"bytes"
+	"encoding/json"
+	"fmt"
+	"io"
+	"math"
+	"net/http"
+	"os"
+	"time"
+)
+
+// FirecrawlDocumentMetadata represents metadata for a Firecrawl document
+type FirecrawlDocumentMetadata struct {
+	Title             string   `json:"title,omitempty"`
+	Description       string   `json:"description,omitempty"`
+	Language          string   `json:"language,omitempty"`
+	Keywords          string   `json:"keywords,omitempty"`
+	Robots            string   `json:"robots,omitempty"`
+	OGTitle           string   `json:"ogTitle,omitempty"`
+	OGDescription     string   `json:"ogDescription,omitempty"`
+	OGURL             string   `json:"ogUrl,omitempty"`
+	OGImage           string   `json:"ogImage,omitempty"`
+	OGAudio           string   `json:"ogAudio,omitempty"`
+	OGDeterminer      string   `json:"ogDeterminer,omitempty"`
+	OGLocale          string   `json:"ogLocale,omitempty"`
+	OGLocaleAlternate []string `json:"ogLocaleAlternate,omitempty"`
+	OGSiteName        string   `json:"ogSiteName,omitempty"`
+	OGVideo           string   `json:"ogVideo,omitempty"`
+	DCTermsCreated    string   `json:"dctermsCreated,omitempty"`
+	DCDateCreated     string   `json:"dcDateCreated,omitempty"`
+	DCDate            string   `json:"dcDate,omitempty"`
+	DCTermsType       string   `json:"dctermsType,omitempty"`
+	DCType            string   `json:"dcType,omitempty"`
+	DCTermsAudience   string   `json:"dctermsAudience,omitempty"`
+	DCTermsSubject    string   `json:"dctermsSubject,omitempty"`
+	DCSubject         string   `json:"dcSubject,omitempty"`
+	DCDescription     string   `json:"dcDescription,omitempty"`
+	DCTermsKeywords   string   `json:"dctermsKeywords,omitempty"`
+	ModifiedTime      string   `json:"modifiedTime,omitempty"`
+	PublishedTime     string   `json:"publishedTime,omitempty"`
+	ArticleTag        string   `json:"articleTag,omitempty"`
+	ArticleSection    string   `json:"articleSection,omitempty"`
+	SourceURL         string   `json:"sourceURL,omitempty"`
+	PageStatusCode    int      `json:"pageStatusCode,omitempty"`
+	PageError         string   `json:"pageError,omitempty"`
+}
+
+// FirecrawlDocument represents a document in Firecrawl
+type FirecrawlDocument struct {
+	ID            string                     `json:"id,omitempty"`
+	URL           string                     `json:"url,omitempty"`
+	Content       string                     `json:"content"`
+	Markdown      string                     `json:"markdown,omitempty"`
+	HTML          string                     `json:"html,omitempty"`
+	LLMExtraction map[string]any             `json:"llm_extraction,omitempty"`
+	CreatedAt     *time.Time                 `json:"createdAt,omitempty"`
+	UpdatedAt     *time.Time                 `json:"updatedAt,omitempty"`
+	Type          string                     `json:"type,omitempty"`
+	Metadata      *FirecrawlDocumentMetadata `json:"metadata,omitempty"`
+	ChildrenLinks []string                   `json:"childrenLinks,omitempty"`
+	Provider      string                     `json:"provider,omitempty"`
+	Warning       string                     `json:"warning,omitempty"`
+	Index         int                        `json:"index,omitempty"`
+}
+
+// ExtractorOptions represents options for extraction.
+type ExtractorOptions struct {
+	Mode             string `json:"mode,omitempty"`
+	ExtractionPrompt string `json:"extractionPrompt,omitempty"`
+	ExtractionSchema any    `json:"extractionSchema,omitempty"`
+}
+
+// ScrapeResponse represents the response for scraping operations
+type ScrapeResponse struct {
+	Success bool               `json:"success"`
+	Data    *FirecrawlDocument `json:"data,omitempty"`
+}
+
+// SearchResponse represents the response for searching operations
+type SearchResponse struct {
+	Success bool                 `json:"success"`
+	Data    []*FirecrawlDocument `json:"data,omitempty"`
+}
+
+// CrawlResponse represents the response for crawling operations
+type CrawlResponse struct {
+	Success bool                 `json:"success"`
+	JobID   string               `json:"jobId,omitempty"`
+	Data    []*FirecrawlDocument `json:"data,omitempty"`
+}
+
+// JobStatusResponse represents the response for checking crawl job status
+type JobStatusResponse struct {
+	Success     bool                 `json:"success"`
+	Status      string               `json:"status"`
+	Current     int                  `json:"current,omitempty"`
+	CurrentURL  string               `json:"current_url,omitempty"`
+	CurrentStep string               `json:"current_step,omitempty"`
+	Total       int                  `json:"total,omitempty"`
+	JobID       string               `json:"jobId,omitempty"`
+	Data        []*FirecrawlDocument `json:"data,omitempty"`
+	PartialData []*FirecrawlDocument `json:"partial_data,omitempty"`
+}
+
+// CancelCrawlJobResponse represents the response for canceling a crawl job
+type CancelCrawlJobResponse struct {
+	Success bool   `json:"success"`
+	Status  string `json:"status"`
+}
+
+// requestOptions represents options for making requests.
+type requestOptions struct {
+	retries int
+	backoff int
+}
+
+// requestOption is a functional option type for requestOptions.
+type requestOption func(*requestOptions)
+
+// newRequestOptions creates a new requestOptions instance with the provided options.
+//
+// Parameters:
+//   - opts: Optional request options.
+//
+// Returns:
+//   - *requestOptions: A new instance of requestOptions with the provided options.
+func newRequestOptions(opts ...requestOption) *requestOptions {
+	options := &requestOptions{retries: 1}
+	for _, opt := range opts {
+		opt(options)
+	}
+	return options
+}
+
+// withRetries sets the number of retries for a request.
+//
+// Parameters:
+//   - retries: The number of retries to be performed.
+//
+// Returns:
+//   - requestOption: A functional option that sets the number of retries for a request.
+func withRetries(retries int) requestOption {
+	return func(opts *requestOptions) {
+		opts.retries = retries
+	}
+}
+
+// withBackoff sets the backoff interval for a request.
+//
+// Parameters:
+//   - backoff: The backoff interval (in milliseconds) to be used for retries.
+//
+// Returns:
+//   - requestOption: A functional option that sets the backoff interval for a request.
+func withBackoff(backoff int) requestOption {
+	return func(opts *requestOptions) {
+		opts.backoff = backoff
+	}
+}
+
+// FirecrawlApp represents a client for the Firecrawl API.
+type FirecrawlApp struct {
+	APIKey string
+	APIURL string
+	Client *http.Client
+}
+
+// NewFirecrawlApp creates a new instance of FirecrawlApp with the provided API key and API URL.
+// If the API key or API URL is not provided, it attempts to retrieve them from environment variables.
+// If the API key is still not found, it returns an error.
+//
+// Parameters:
+//   - apiKey: The API key for authenticating with the Firecrawl API. If empty, it will be retrieved from the FIRECRAWL_API_KEY environment variable.
+//   - apiURL: The base URL for the Firecrawl API. If empty, it will be retrieved from the FIRECRAWL_API_URL environment variable, defaulting to "https://api.firecrawl.dev".
+//
+// Returns:
+//   - *FirecrawlApp: A new instance of FirecrawlApp configured with the provided or retrieved API key and API URL.
+//   - error: An error if the API key is not provided or retrieved.
+func NewFirecrawlApp(apiKey, apiURL string) (*FirecrawlApp, error) {
+	if apiKey == "" {
+		apiKey = os.Getenv("FIRECRAWL_API_KEY")
+		if apiKey == "" {
+			return nil, fmt.Errorf("no API key provided")
+		}
+	}
+
+	if apiURL == "" {
+		apiURL = os.Getenv("FIRECRAWL_API_URL")
+		if apiURL == "" {
+			apiURL = "https://api.firecrawl.dev"
+		}
+	}
+
+	client := &http.Client{
+		Timeout: 30 * time.Second,
+	}
+
+	return &FirecrawlApp{
+		APIKey: apiKey,
+		APIURL: apiURL,
+		Client: client,
+	}, nil
+}
+
+// ScrapeURL scrapes the content of the specified URL using the Firecrawl API.
+//
+// Parameters:
+//   - url: The URL to be scraped.
+//   - params: Optional parameters for the scrape request, including extractor options for LLM extraction.
+//
+// Returns:
+//   - *FirecrawlDocument: The scraped document data.
+//   - error: An error if the scrape request fails.
+func (app *FirecrawlApp) ScrapeURL(url string, params map[string]any) (*FirecrawlDocument, error) {
+	headers := app.prepareHeaders("")
+	scrapeBody := map[string]any{"url": url}
+
+	if params != nil {
+		if extractorOptions, ok := params["extractorOptions"].(ExtractorOptions); ok {
+			if schema, ok := extractorOptions.ExtractionSchema.(interface{ schema() any }); ok {
+				extractorOptions.ExtractionSchema = schema.schema()
+			}
+			if extractorOptions.Mode == "" {
+				extractorOptions.Mode = "llm-extraction"
+			}
+			scrapeBody["extractorOptions"] = extractorOptions
+		}
+
+		for key, value := range params {
+			if key != "extractorOptions" {
+				scrapeBody[key] = value
+			}
+		}
+	}
+
+	resp, err := app.makeRequest(
+		http.MethodPost,
+		fmt.Sprintf("%s/v0/scrape", app.APIURL),
+		scrapeBody,
+		headers,
+		"scrape URL",
+	)
+	if err != nil {
+		return nil, err
+	}
+
+	var scrapeResponse ScrapeResponse
+	err = json.Unmarshal(resp, &scrapeResponse)
+	if err != nil {
+		return nil, err
+	}
+
+	if scrapeResponse.Success {
+		return scrapeResponse.Data, nil
+	}
+
+	return nil, fmt.Errorf("failed to scrape URL")
+}
+
+// Search performs a search query using the Firecrawl API and returns the search results.
+//
+// Parameters:
+//   - query: The search query string.
+//   - params: Optional parameters for the search request.
+//
+// Returns:
+//   - []*FirecrawlDocument: A slice of FirecrawlDocument containing the search results.
+//   - error: An error if the search request fails.
+func (app *FirecrawlApp) Search(query string, params map[string]any) ([]*FirecrawlDocument, error) {
+	headers := app.prepareHeaders("")
+	searchBody := map[string]any{"query": query}
+	for k, v := range params {
+		searchBody[k] = v
+	}
+
+	resp, err := app.makeRequest(
+		http.MethodPost,
+		fmt.Sprintf("%s/v0/search", app.APIURL),
+		searchBody,
+		headers,
+		"search",
+	)
+	if err != nil {
+		return nil, err
+	}
+
+	var searchResponse SearchResponse
+	err = json.Unmarshal(resp, &searchResponse)
+	if err != nil {
+		return nil, err
+	}
+
+	if searchResponse.Success {
+		return searchResponse.Data, nil
+	}
+
+	return nil, fmt.Errorf("failed to search")
+}
+
+// CrawlURL starts a crawl job for the specified URL using the Firecrawl API.
+//
+// Parameters:
+//   - url: The URL to crawl.
+//   - params: Optional parameters for the crawl request.
+//   - waitUntilDone: If true, the method will wait until the crawl job is completed before returning.
+//   - pollInterval: The interval (in seconds) at which to poll the job status if waitUntilDone is true.
+//   - idempotencyKey: An optional idempotency key to ensure the request is idempotent.
+//
+// Returns:
+//   - any: The job ID if waitUntilDone is false, or the crawl result if waitUntilDone is true.
+//   - error: An error if the crawl request fails.
+func (app *FirecrawlApp) CrawlURL(url string, params map[string]any, waitUntilDone bool, pollInterval int, idempotencyKey string) (any, error) {
+	headers := app.prepareHeaders(idempotencyKey)
+	crawlBody := map[string]any{"url": url}
+	for k, v := range params {
+		crawlBody[k] = v
+	}
+
+	resp, err := app.makeRequest(
+		http.MethodPost,
+		fmt.Sprintf("%s/v0/crawl", app.APIURL),
+		crawlBody,
+		headers,
+		"start crawl job",
+		withRetries(3),
+		withBackoff(500),
+	)
+	if err != nil {
+		return nil, err
+	}
+
+	var crawlResponse CrawlResponse
+	err = json.Unmarshal(resp, &crawlResponse)
+	if err != nil {
+		return nil, err
+	}
+
+	if waitUntilDone {
+		return app.monitorJobStatus(crawlResponse.JobID, headers, pollInterval)
+	}
+
+	if crawlResponse.JobID == "" {
+		return nil, fmt.Errorf("failed to get job ID")
+	}
+
+	return crawlResponse.JobID, nil
+}
+
+// CheckCrawlStatus checks the status of a crawl job using the Firecrawl API.
+//
+// Parameters:
+//   - jobID: The ID of the crawl job to check.
+//
+// Returns:
+//   - *JobStatusResponse: The status of the crawl job.
+//   - error: An error if the crawl status check request fails.
+func (app *FirecrawlApp) CheckCrawlStatus(jobID string) (*JobStatusResponse, error) {
+	headers := app.prepareHeaders("")
+	resp, err := app.makeRequest(
+		http.MethodGet,
+		fmt.Sprintf("%s/v0/crawl/status/%s", app.APIURL, jobID),
+		nil,
+		headers,
+		"check crawl status",
+		withRetries(3),
+		withBackoff(500),
+	)
+	if err != nil {
+		return nil, err
+	}
+
+	var jobStatusResponse JobStatusResponse
+	err = json.Unmarshal(resp, &jobStatusResponse)
+	if err != nil {
+		return nil, err
+	}
+
+	return &jobStatusResponse, nil
+}
+
+// CancelCrawlJob cancels a crawl job using the Firecrawl API.
+//
+// Parameters:
+//   - jobID: The ID of the crawl job to cancel.
+//
+// Returns:
+//   - string: The status of the crawl job after cancellation.
+//   - error: An error if the crawl job cancellation request fails.
+func (app *FirecrawlApp) CancelCrawlJob(jobID string) (string, error) {
+	headers := app.prepareHeaders("")
+	resp, err := app.makeRequest(
+		http.MethodDelete,
+		fmt.Sprintf("%s/v0/crawl/cancel/%s", app.APIURL, jobID),
+		nil,
+		headers,
+		"cancel crawl job",
+	)
+	if err != nil {
+		return "", err
+	}
+
+	var cancelCrawlJobResponse CancelCrawlJobResponse
+	err = json.Unmarshal(resp, &cancelCrawlJobResponse)
+	if err != nil {
+		return "", err
+	}
+
+	return cancelCrawlJobResponse.Status, nil
+}
+
+// prepareHeaders prepares the headers for an HTTP request.
+//
+// Parameters:
+//   - idempotencyKey: A string representing the idempotency key to be included in the headers.
+//     If the idempotency key is an empty string, it will not be included in the headers.
+//
+// Returns:
+//   - map[string]string: A map containing the headers for the HTTP request.
+func (app *FirecrawlApp) prepareHeaders(idempotencyKey string) map[string]string {
+	headers := map[string]string{
+		"Content-Type":  "application/json",
+		"Authorization": fmt.Sprintf("Bearer %s", app.APIKey),
+	}
+	if idempotencyKey != "" {
+		headers["x-idempotency-key"] = idempotencyKey
+	}
+	return headers
+}
+
+// makeRequest makes a request to the specified URL with the provided method, data, headers, and options.
+//
+// Parameters:
+//   - method: The HTTP method to use for the request (e.g., "GET", "POST", "DELETE").
+//   - url: The URL to send the request to.
+//   - data: The data to be sent in the request body.
+//   - headers: The headers to be included in the request.
+//   - action: A string describing the action being performed.
+//   - opts: Optional request options.
+//
+// Returns:
+//   - []byte: The response body from the request.
+//   - error: An error if the request fails.
+func (app *FirecrawlApp) makeRequest(method, url string, data map[string]any, headers map[string]string, action string, opts ...requestOption) ([]byte, error) {
+	var body []byte
+	var err error
+	if data != nil {
+		body, err = json.Marshal(data)
+		if err != nil {
+			return nil, err
+		}
+	}
+
+	req, err := http.NewRequest(method, url, bytes.NewBuffer(body))
+	if err != nil {
+		return nil, err
+	}
+
+	for key, value := range headers {
+		req.Header.Set(key, value)
+	}
+
+	var resp *http.Response
+	options := newRequestOptions(opts...)
+	for i := 0; i < options.retries; i++ {
+		resp, err = app.Client.Do(req)
+		if err != nil {
+			return nil, err
+		}
+		defer resp.Body.Close()
+
+		if resp.StatusCode != 502 {
+			break
+		}
+
+		time.Sleep(time.Duration(math.Pow(2, float64(i))) * time.Duration(options.backoff) * time.Millisecond)
+	}
+
+	respBody, err := io.ReadAll(resp.Body)
+	if err != nil {
+		return nil, err
+	}
+
+	statusCode := resp.StatusCode
+	if statusCode != 200 {
+		return nil, app.handleError(statusCode, respBody, action)
+	}
+
+	return respBody, nil
+}
+
+// monitorJobStatus monitors the status of a crawl job using the Firecrawl API.
+//
+// Parameters:
+//   - jobID: The ID of the crawl job to monitor.
+//   - headers: The headers to be included in the request.
+//   - pollInterval: The interval (in seconds) at which to poll the job status.
+//
+// Returns:
+//   - []*FirecrawlDocument: The crawl result if the job is completed.
+//   - error: An error if the crawl status check request fails.
+func (app *FirecrawlApp) monitorJobStatus(jobID string, headers map[string]string, pollInterval int) ([]*FirecrawlDocument, error) {
+	for {
+		resp, err := app.makeRequest(
+			http.MethodGet,
+			fmt.Sprintf("%s/v0/crawl/status/%s", app.APIURL, jobID),
+			nil,
+			headers,
+			"check crawl status",
+			withRetries(3),
+			withBackoff(500),
+		)
+		if err != nil {
+			return nil, err
+		}
+
+		var statusData JobStatusResponse
+		err = json.Unmarshal(resp, &statusData)
+		if err != nil {
+			return nil, err
+		}
+
+		status := statusData.Status
+		if status == "" {
+			return nil, fmt.Errorf("invalid status in response")
+		}
+
+		if status == "completed" {
+			if statusData.Data != nil {
+				return statusData.Data, nil
+			}
+			return nil, fmt.Errorf("crawl job completed but no data was returned")
+		} else if status == "active" || status == "paused" || status == "pending" || status == "queued" || status == "waiting" {
+			pollInterval = max(pollInterval, 2)
+			time.Sleep(time.Duration(pollInterval) * time.Second)
+		} else {
+			return nil, fmt.Errorf("crawl job failed or was stopped. Status: %s", status)
+		}
+	}
+}
+
+// handleError handles errors returned by the Firecrawl API.
+//
+// Parameters:
+//   - resp: The HTTP response object.
+//   - body: The response body from the HTTP response.
+//   - action: A string describing the action being performed.
+//
+// Returns:
+//   - error: An error describing the failure reason.
+func (app *FirecrawlApp) handleError(statusCode int, body []byte, action string) error {
+	var errorData map[string]any
+	err := json.Unmarshal(body, &errorData)
+	if err != nil {
+		return fmt.Errorf("failed to parse error response: %v", err)
+	}
+
+	errorMessage, _ := errorData["error"].(string)
+	if errorMessage == "" {
+		errorMessage = "No additional error details provided."
+	}
+
+	var message string
+	switch statusCode {
+	case 402:
+		message = fmt.Sprintf("Payment Required: Failed to %s. %s", action, errorMessage)
+	case 408:
+		message = fmt.Sprintf("Request Timeout: Failed to %s as the request timed out. %s", action, errorMessage)
+	case 409:
+		message = fmt.Sprintf("Conflict: Failed to %s due to a conflict. %s", action, errorMessage)
+	case 500:
+		message = fmt.Sprintf("Internal Server Error: Failed to %s. %s", action, errorMessage)
+	default:
+		message = fmt.Sprintf("Unexpected error during %s: Status code %d. %s", action, statusCode, errorMessage)
+	}
+
+	return fmt.Errorf(message)
+}
@@ -0,0 +1,292 @@
+package firecrawl
+
+import (
+	"log"
+	"os"
+	"testing"
+	"time"
+
+	"github.com/google/uuid"
+	"github.com/joho/godotenv"
+	"github.com/stretchr/testify/assert"
+	"github.com/stretchr/testify/require"
+)
+
+var API_URL string
+var TEST_API_KEY string
+
+func init() {
+	err := godotenv.Load()
+	if err != nil {
+		log.Fatalf("Error loading .env file: %v", err)
+	}
+	API_URL = os.Getenv("API_URL")
+	TEST_API_KEY = os.Getenv("TEST_API_KEY")
+}
+
+func TestNoAPIKey(t *testing.T) {
+	_, err := NewFirecrawlApp("", API_URL)
+	assert.Error(t, err)
+	assert.Contains(t, err.Error(), "no API key provided")
+}
+
+func TestScrapeURLInvalidAPIKey(t *testing.T) {
+	app, err := NewFirecrawlApp("invalid_api_key", API_URL)
+	require.NoError(t, err)
+
+	_, err = app.ScrapeURL("https://firecrawl.dev", nil)
+	assert.Error(t, err)
+	assert.Contains(t, err.Error(), "Unexpected error during scrape URL: Status code 401. Unauthorized: Invalid token")
+}
+
+func TestBlocklistedURL(t *testing.T) {
+	app, err := NewFirecrawlApp(TEST_API_KEY, API_URL)
+	require.NoError(t, err)
+
+	_, err = app.ScrapeURL("https://facebook.com/fake-test", nil)
+	assert.Error(t, err)
+	assert.Contains(t, err.Error(), "Unexpected error during scrape URL: Status code 403. Firecrawl currently does not support social media scraping due to policy restrictions.")
+}
+
+func TestSuccessfulResponseWithValidPreviewToken(t *testing.T) {
+	app, err := NewFirecrawlApp("this_is_just_a_preview_token", API_URL)
+	require.NoError(t, err)
+
+	response, err := app.ScrapeURL("https://roastmywebsite.ai", nil)
+	require.NoError(t, err)
+	assert.NotNil(t, response)
+
+	assert.Contains(t, response.Content, "_Roast_")
+}
+
+func TestScrapeURLE2E(t *testing.T) {
+	app, err := NewFirecrawlApp(TEST_API_KEY, API_URL)
+	require.NoError(t, err)
+
+	response, err := app.ScrapeURL("https://roastmywebsite.ai", nil)
+	require.NoError(t, err)
+	assert.NotNil(t, response)
+
+	assert.Contains(t, response.Content, "_Roast_")
+	assert.NotEqual(t, response.Markdown, "")
+	assert.NotNil(t, response.Metadata)
+	assert.Equal(t, response.HTML, "")
+}
+
+func TestSuccessfulResponseWithValidAPIKeyAndIncludeHTML(t *testing.T) {
+	app, err := NewFirecrawlApp(TEST_API_KEY, API_URL)
+	require.NoError(t, err)
+
+	params := map[string]any{
+		"pageOptions": map[string]any{
+			"includeHtml": true,
+		},
+	}
+	response, err := app.ScrapeURL("https://roastmywebsite.ai", params)
+	require.NoError(t, err)
+	assert.NotNil(t, response)
+
+	assert.Contains(t, response.Content, "_Roast_")
+	assert.Contains(t, response.Markdown, "_Roast_")
+	assert.Contains(t, response.HTML, "<h1")
+	assert.NotNil(t, response.Metadata)
+}
+
+func TestSuccessfulResponseForValidScrapeWithPDFFile(t *testing.T) {
+	app, err := NewFirecrawlApp(TEST_API_KEY, API_URL)
+	require.NoError(t, err)
+
+	response, err := app.ScrapeURL("https://arxiv.org/pdf/astro-ph/9301001.pdf", nil)
+	require.NoError(t, err)
+	assert.NotNil(t, response)
+
+	assert.Contains(t, response.Content, "We present spectrophotometric observations of the Broad Line Radio Galaxy")
+	assert.NotNil(t, response.Metadata)
+}
+
+func TestSuccessfulResponseForValidScrapeWithPDFFileWithoutExplicitExtension(t *testing.T) {
+	app, err := NewFirecrawlApp(TEST_API_KEY, API_URL)
+	require.NoError(t, err)
+
+	response, err := app.ScrapeURL("https://arxiv.org/pdf/astro-ph/9301001", nil)
+	time.Sleep(6 * time.Second) // wait for 6 seconds
+	require.NoError(t, err)
+	assert.NotNil(t, response)
+
+	assert.Contains(t, response.Content, "We present spectrophotometric observations of the Broad Line Radio Galaxy")
+	assert.NotNil(t, response.Metadata)
+}
+
+func TestCrawlURLInvalidAPIKey(t *testing.T) {
+	app, err := NewFirecrawlApp("invalid_api_key", API_URL)
+	require.NoError(t, err)
+
+	_, err = app.CrawlURL("https://firecrawl.dev", nil, false, 2, "")
+	assert.Error(t, err)
+	assert.Contains(t, err.Error(), "Unexpected error during start crawl job: Status code 401. Unauthorized: Invalid token")
+}
+
+func TestShouldReturnErrorForBlocklistedURL(t *testing.T) {
+	app, err := NewFirecrawlApp(TEST_API_KEY, API_URL)
+	require.NoError(t, err)
+
+	_, err = app.CrawlURL("https://twitter.com/fake-test", nil, false, 2, "")
+	assert.Error(t, err)
+	assert.Contains(t, err.Error(), "Unexpected error during start crawl job: Status code 403. Firecrawl currently does not support social media scraping due to policy restrictions.")
+}
+
+func TestCrawlURLWaitForCompletionE2E(t *testing.T) {
+	app, err := NewFirecrawlApp(TEST_API_KEY, API_URL)
+	require.NoError(t, err)
+
+	params := map[string]any{
+		"crawlerOptions": map[string]any{
+			"excludes": []string{"blog/*"},
+		},
+	}
+	response, err := app.CrawlURL("https://roastmywebsite.ai", params, true, 2, "")
+	require.NoError(t, err)
+	assert.NotNil(t, response)
+
+	data, ok := response.([]*FirecrawlDocument)
+	assert.True(t, ok)
+	assert.Greater(t, len(data), 0)
+	assert.Contains(t, data[0].Content, "_Roast_")
+}
+
+func TestCrawlURLWithIdempotencyKeyE2E(t *testing.T) {
+	app, err := NewFirecrawlApp(TEST_API_KEY, API_URL)
+	require.NoError(t, err)
+
+	uniqueIdempotencyKey := uuid.New().String()
+	params := map[string]any{
+		"crawlerOptions": map[string]any{
+			"excludes": []string{"blog/*"},
+		},
+	}
+	response, err := app.CrawlURL("https://roastmywebsite.ai", params, true, 2, uniqueIdempotencyKey)
+	require.NoError(t, err)
+	assert.NotNil(t, response)
+
+	data, ok := response.([]*FirecrawlDocument)
+	assert.True(t, ok)
+	assert.Greater(t, len(data), 0)
+	assert.Contains(t, data[0].Content, "_Roast_")
+
+	_, err = app.CrawlURL("https://firecrawl.dev", params, true, 2, uniqueIdempotencyKey)
+	assert.Error(t, err)
+	assert.Contains(t, err.Error(), "Conflict: Failed to start crawl job due to a conflict. Idempotency key already used")
+}
+
+func TestCheckCrawlStatusE2E(t *testing.T) {
+	app, err := NewFirecrawlApp(TEST_API_KEY, API_URL)
+	require.NoError(t, err)
+
+	params := map[string]any{
+		"crawlerOptions": map[string]any{
+			"excludes": []string{"blog/*"},
+		},
+	}
+	response, err := app.CrawlURL("https://firecrawl.dev", params, false, 2, "")
+	require.NoError(t, err)
+	assert.NotNil(t, response)
+
+	jobID, ok := response.(string)
+	assert.True(t, ok)
+	assert.NotEqual(t, "", jobID)
+
+	time.Sleep(30 * time.Second) // wait for 30 seconds
+
+	statusResponse, err := app.CheckCrawlStatus(jobID)
+	require.NoError(t, err)
+	assert.NotNil(t, statusResponse)
+
+	assert.Equal(t, "completed", statusResponse.Status)
+	assert.Greater(t, len(statusResponse.Data), 0)
+}
+
+func TestSearchE2E(t *testing.T) {
+	app, err := NewFirecrawlApp(TEST_API_KEY, API_URL)
+	require.NoError(t, err)
+
+	response, err := app.Search("test query", nil)
+	require.NoError(t, err)
+	assert.NotNil(t, response)
+
+	assert.Greater(t, len(response), 2)
+	assert.NotEqual(t, response[0].Content, "")
+}
+
+func TestSearchInvalidAPIKey(t *testing.T) {
+	app, err := NewFirecrawlApp("invalid_api_key", API_URL)
+	require.NoError(t, err)
+
+	_, err = app.Search("test query", nil)
+	assert.Error(t, err)
+	assert.Contains(t, err.Error(), "Unexpected error during search: Status code 401. Unauthorized: Invalid token")
+}
+
+func TestLLMExtraction(t *testing.T) {
+	app, err := NewFirecrawlApp(TEST_API_KEY, API_URL)
+	require.NoError(t, err)
+
+	params := map[string]any{
+		"extractorOptions": ExtractorOptions{
+			Mode:             "llm-extraction",
+			ExtractionPrompt: "Based on the information on the page, find what the company's mission is and whether it supports SSO, and whether it is open source",
+			ExtractionSchema: map[string]any{
+				"type": "object",
+				"properties": map[string]any{
+					"company_mission": map[string]string{"type": "string"},
+					"supports_sso":    map[string]string{"type": "boolean"},
+					"is_open_source":  map[string]string{"type": "boolean"},
+				},
+				"required": []string{"company_mission", "supports_sso", "is_open_source"},
+			},
+		},
+	}
+
+	response, err := app.ScrapeURL("https://mendable.ai", params)
+	require.NoError(t, err)
+	assert.NotNil(t, response)
+
+	assert.Contains(t, response.LLMExtraction, "company_mission")
+	assert.IsType(t, true, response.LLMExtraction["supports_sso"])
+	assert.IsType(t, true, response.LLMExtraction["is_open_source"])
+}
+
+func TestCancelCrawlJobInvalidAPIKey(t *testing.T) {
+	app, err := NewFirecrawlApp("invalid_api_key", API_URL)
+	require.NoError(t, err)
+
+	_, err = app.CancelCrawlJob("test query")
+	assert.Error(t, err)
+	assert.Contains(t, err.Error(), "Unexpected error during cancel crawl job: Status code 401. Unauthorized: Invalid token")
+}
+
+func TestCancelNonExistingCrawlJob(t *testing.T) {
+	app, err := NewFirecrawlApp(TEST_API_KEY, API_URL)
+	require.NoError(t, err)
+
+	jobID := uuid.New().String()
+	_, err = app.CancelCrawlJob(jobID)
+	assert.Error(t, err)
+	assert.Contains(t, err.Error(), "Job not found")
+}
+
+func TestCancelCrawlJobE2E(t *testing.T) {
+	app, err := NewFirecrawlApp(TEST_API_KEY, API_URL)
+	require.NoError(t, err)
+
+	response, err := app.CrawlURL("https://firecrawl.dev", nil, false, 2, "")
+	require.NoError(t, err)
+	assert.NotNil(t, response)
+
+	jobID, ok := response.(string)
+	assert.True(t, ok)
+	assert.NotEqual(t, "", jobID)
+
+	status, err := app.CancelCrawlJob(jobID)
+	require.NoError(t, err)
+	assert.Equal(t, "cancelled", status)
+}