This commit is contained in:
Ryan Chen
2025-06-26 20:19:21 -04:00
parent 948f36ffc1
commit ff29bccb95
23 changed files with 11614 additions and 11507 deletions

530
src/utils/articleUtils.ts Normal file
View File

@@ -0,0 +1,530 @@
import AsyncStorage from '@react-native-async-storage/async-storage';
export interface Article {
id: string;
title: string;
url: string;
htmlContent?: string;
archivedAt: string;
type: 'link' | 'html';
source?: 'manual' | 'archive_api';
timestamp?: string;
isRead?: boolean;
readAt?: string;
}
/**
* Fetches HTML content from a URL
*/
export const fetchHtmlFromUrl = async (url: string): Promise<string> => {
try {
const response = await fetch(url);
if (!response.ok) {
throw new Error(`HTTP error! status: ${response.status}`);
}
const html = await response.text();
return html;
} catch (error) {
console.error('Error fetching HTML:', error);
throw new Error(`Failed to fetch HTML from ${url}: ${error}`);
}
};
/**
* Extracts title from HTML content
*/
export const extractTitleFromHtml = (html: string): string => {
try {
// Try to extract title from <title> tag
const titleMatch = html.match(/<title[^>]*>([^<]+)<\/title>/i);
if (titleMatch && titleMatch[1]) {
return titleMatch[1].trim();
}
// Try to extract from h1 tag
const h1Match = html.match(/<h1[^>]*>([^<]+)<\/h1>/i);
if (h1Match && h1Match[1]) {
return h1Match[1].trim();
}
// Fallback to URL domain
return 'Untitled Article';
} catch (error) {
console.error('Error extracting title:', error);
return 'Untitled Article';
}
};
/**
* Saves an HTML article to local storage
*/
export const saveHtmlArticle = async (
url: string,
title?: string,
): Promise<Article> => {
try {
// Fetch HTML content
const htmlContent = await fetchHtmlFromUrl(url);
// Extract title if not provided
const extractedTitle = title || extractTitleFromHtml(htmlContent);
// Create article object
const article: Article = {
id: Date.now().toString(),
title: extractedTitle,
url: url,
htmlContent: htmlContent,
archivedAt: new Date().toISOString(),
type: 'html',
};
// Get existing articles
const existingArticles = await AsyncStorage.getItem('articles');
const articles: Article[] = existingArticles
? JSON.parse(existingArticles)
: [];
// Add new article to the beginning
const updatedArticles = [article, ...articles];
// Save to storage
await AsyncStorage.setItem('articles', JSON.stringify(updatedArticles));
return article;
} catch (error) {
console.error('Error saving HTML article:', error);
throw error;
}
};
/**
* Saves a regular link article to local storage
*/
export const saveLinkArticle = async (
url: string,
title?: string,
): Promise<Article> => {
try {
// Create article object
const article: Article = {
id: Date.now().toString(),
title: title || 'Untitled Article',
url: url,
archivedAt: new Date().toISOString(),
type: 'link',
};
// Get existing articles
const existingArticles = await AsyncStorage.getItem('articles');
const articles: Article[] = existingArticles
? JSON.parse(existingArticles)
: [];
// Add new article to the beginning
const updatedArticles = [article, ...articles];
// Save to storage
await AsyncStorage.setItem('articles', JSON.stringify(updatedArticles));
return article;
} catch (error) {
console.error('Error saving link article:', error);
throw error;
}
};
/**
* Loads all articles from local storage
*/
export const loadArticles = async (): Promise<Article[]> => {
try {
const storedArticles = await AsyncStorage.getItem('articles');
return storedArticles ? JSON.parse(storedArticles) : [];
} catch (error) {
console.error('Error loading articles:', error);
return [];
}
};
/**
* Deletes an article from local storage
*/
export const deleteArticle = async (articleId: string): Promise<void> => {
try {
const articles = await loadArticles();
const updatedArticles = articles.filter(
article => article.id !== articleId,
);
await AsyncStorage.setItem('articles', JSON.stringify(updatedArticles));
} catch (error) {
console.error('Error deleting article:', error);
throw error;
}
};
/**
* Validates if a URL is accessible and returns HTML
*/
export const validateHtmlUrl = async (url: string): Promise<boolean> => {
try {
const response = await fetch(url, { method: 'HEAD' });
return response.ok;
} catch (error) {
console.error('Error validating URL:', error);
return false;
}
};
/**
* Makes a GET request with custom headers
*/
export const makeGetRequest = async (
url: string,
apiKey: string,
): Promise<Response> => {
try {
const response = await fetch(url, {
method: 'GET',
headers: {
accept: 'application/json',
'X-ArchiveBox-API-Key': apiKey,
},
});
if (!response.ok) {
throw new Error(`HTTP error! status: ${response.status}`);
}
return response;
} catch (error) {
console.error('Error making GET request:', error);
throw error;
}
};
/**
* Fetches archive results from the API
*/
export const fetchArchiveResults = async (
baseUrl: string,
apiKey: string,
): Promise<any> => {
try {
// Use the provided parameters instead of hardcoded values
// Clean up the base URL (remove trailing slash and colon if present)
const cleanBaseUrl = baseUrl.replace(/[\/:]+$/, '');
// Try different possible API endpoints
const possibleEndpoints = [
`${cleanBaseUrl}/api/v1/core/archiveresults?limit=200&extractor=title`,
`${cleanBaseUrl}/api/archiveresults?limit=200&extractor=title`,
`${cleanBaseUrl}/archiveresults?limit=200&extractor=title`,
`${cleanBaseUrl}/api/v1/archiveresults?limit=200&extractor=title`,
];
console.log('=== API REQUEST DEBUG ===');
console.log('Original baseUrl:', baseUrl);
console.log('Cleaned baseUrl:', cleanBaseUrl);
console.log('Attempting to fetch from endpoints:', possibleEndpoints);
console.log('Using API key:', apiKey ? 'Present' : 'Missing');
let lastError: Error | null = null;
for (const endpoint of possibleEndpoints) {
try {
console.log(`\n🔗 Trying endpoint: ${endpoint}`);
const response = await fetch(endpoint, {
method: 'GET',
headers: {
Accept: 'application/json',
'Content-Type': 'application/json',
'X-ArchiveBox-API-Key': apiKey,
Authorization: `Bearer ${apiKey}`,
'User-Agent': 'PocketDog/1.0',
},
});
console.log(`✅ Response status: ${response.status}`);
console.log(
`📋 Response headers:`,
Object.fromEntries(response.headers.entries()),
);
if (!response.ok) {
const errorText = await response.text();
console.log(`❌ Error response body: ${errorText}`);
throw new Error(`HTTP ${response.status}: ${errorText}`);
}
const data = await response.json();
console.log('🎉 Successfully fetched data:', data);
// Parse the results and download HTML content
const processedData = await processArchiveResults(data, cleanBaseUrl);
return processedData;
} catch (error) {
console.log(`❌ Failed to fetch from ${endpoint}:`, error);
lastError = error as Error;
continue;
}
}
// If all endpoints failed, throw the last error
throw lastError || new Error('All API endpoints failed');
} catch (error) {
console.error('🚨 Error fetching archive results:', error);
// Provide more specific error messages with URL info
if (error instanceof TypeError && error.message.includes('fetch')) {
throw new Error(
`Network error: Failed to reach ${baseUrl}. Tried endpoints: ${possibleEndpoints.join(
', ',
)}. Please check your internet connection and ensure the server is running.`,
);
}
if (error instanceof Error && error.message.includes('401')) {
throw new Error(
`Authentication failed: Please check your API key. Tried URL: ${baseUrl}`,
);
}
if (error instanceof Error && error.message.includes('404')) {
throw new Error(
`API endpoint not found: Tried multiple endpoints on ${baseUrl}. Attempted URLs: ${possibleEndpoints.join(
', ',
)}`,
);
}
throw new Error(
`Failed to fetch archive results from ${baseUrl}. Tried URLs: ${possibleEndpoints.join(
', ',
)}. Error: ${error.message}`,
);
}
};
/**
* Processes archive results and downloads HTML content
*/
const processArchiveResults = async (data: any, baseUrl: string) => {
try {
console.log('📊 Processing archive results...');
// Check if data has items array
const items = data.items || data.results || data.data || [];
if (!Array.isArray(items)) {
console.log('⚠️ No items array found in response:', data);
return {
...data,
processedItems: [],
totalProcessed: 0,
downloadedArticles: [],
};
}
console.log(`📦 Found ${items.length} items to process`);
const processedItems = [];
const downloadedArticles = [];
for (let index = 0; index < items.length; index++) {
const item = items[index];
// Extract timestamp from the item
const timestamp =
item.snapshot_timestamp ||
item.timestamp ||
item.date ||
item.created_at ||
item.archived_at;
if (!timestamp) {
console.log(`⚠️ No timestamp found for item ${index}:`, item);
processedItems.push({
...item,
downloadableUrl: null,
error: 'No timestamp found',
});
continue;
}
// Create downloadable URL using the format: {base_url}/archive/{timestamp}/mercury/content.html
const downloadableUrl = `${baseUrl}/archive/${timestamp}/mercury/content.html`;
console.log(
`🔗 Item ${index}: ${
item.title || item.url || 'Untitled'
} -> ${downloadableUrl}`,
);
const processedItem = {
...item,
downloadableUrl,
originalUrl: item.url || item.original_url || item.link,
title: item.output || item.title || item.name || 'Untitled',
timestamp: timestamp,
};
processedItems.push(processedItem);
// Download the HTML content using snapshot_timestamp
try {
console.log(`📥 Downloading HTML for item ${index}...`);
const htmlContent = await fetchHtmlFromUrl(downloadableUrl);
// Create article object
const article: Article = {
id: `archive_${timestamp}_${index}`,
title: processedItem.title,
url: processedItem.originalUrl,
htmlContent: htmlContent,
archivedAt: new Date().toISOString(),
type: 'html',
source: 'archive_api',
timestamp: timestamp,
};
// Save to local storage
await saveArticleToStorage(article);
downloadedArticles.push(article);
console.log(
`✅ Successfully downloaded and saved article: ${article.title}`,
);
} catch (downloadError) {
console.log(
`❌ Failed to download HTML for item ${index}:`,
downloadError,
);
processedItem.downloadError = downloadError.message;
}
}
console.log(
`✅ Successfully processed ${processedItems.length} items and downloaded ${downloadedArticles.length} articles`,
);
return {
...data,
processedItems,
totalProcessed: processedItems.length,
downloadedArticles,
totalDownloaded: downloadedArticles.length,
baseUrl: baseUrl,
};
} catch (error) {
console.error('❌ Error processing archive results:', error);
return {
...data,
processedItems: [],
totalProcessed: 0,
downloadedArticles: [],
totalDownloaded: 0,
error: `Processing error: ${error.message}`,
};
}
};
/**
* Saves an article to local storage
*/
const saveArticleToStorage = async (article: Article): Promise<void> => {
try {
// Get existing articles
const existingArticles = await AsyncStorage.getItem('articles');
const articles: Article[] = existingArticles
? JSON.parse(existingArticles)
: [];
// Check if article already exists (by ID)
const existingIndex = articles.findIndex(a => a.id === article.id);
if (existingIndex >= 0) {
// Update existing article
articles[existingIndex] = article;
} else {
// Add new article to the beginning
articles.unshift(article);
}
// Save to storage
await AsyncStorage.setItem('articles', JSON.stringify(articles));
} catch (error) {
console.error('Error saving article to storage:', error);
throw error;
}
};
/**
* Marks an article as read
*/
export const markArticleAsRead = async (articleId: string): Promise<void> => {
try {
const articles = await loadArticles();
const updatedArticles = articles.map(article => {
if (article.id === articleId) {
return {
...article,
isRead: true,
readAt: new Date().toISOString(),
};
}
return article;
});
await AsyncStorage.setItem('articles', JSON.stringify(updatedArticles));
} catch (error) {
console.error('Error marking article as read:', error);
throw error;
}
};
/**
* Marks an article as unread
*/
export const markArticleAsUnread = async (articleId: string): Promise<void> => {
try {
const articles = await loadArticles();
const updatedArticles = articles.map(article => {
if (article.id === articleId) {
return {
...article,
isRead: false,
readAt: undefined,
};
}
return article;
});
await AsyncStorage.setItem('articles', JSON.stringify(updatedArticles));
} catch (error) {
console.error('Error marking article as unread:', error);
throw error;
}
};
/**
* Gets read statistics
*/
export const getReadStats = async (): Promise<{
total: number;
read: number;
unread: number;
}> => {
try {
const articles = await loadArticles();
const total = articles.length;
const read = articles.filter(article => article.isRead).length;
const unread = total - read;
return { total, read, unread };
} catch (error) {
console.error('Error getting read stats:', error);
return { total: 0, read: 0, unread: 0 };
}
};