inital
This commit is contained in:
530
src/utils/articleUtils.ts
Normal file
530
src/utils/articleUtils.ts
Normal file
@@ -0,0 +1,530 @@
|
||||
import AsyncStorage from '@react-native-async-storage/async-storage';
|
||||
|
||||
export interface Article {
|
||||
id: string;
|
||||
title: string;
|
||||
url: string;
|
||||
htmlContent?: string;
|
||||
archivedAt: string;
|
||||
type: 'link' | 'html';
|
||||
source?: 'manual' | 'archive_api';
|
||||
timestamp?: string;
|
||||
isRead?: boolean;
|
||||
readAt?: string;
|
||||
}
|
||||
|
||||
/**
|
||||
* Fetches HTML content from a URL
|
||||
*/
|
||||
export const fetchHtmlFromUrl = async (url: string): Promise<string> => {
|
||||
try {
|
||||
const response = await fetch(url);
|
||||
|
||||
if (!response.ok) {
|
||||
throw new Error(`HTTP error! status: ${response.status}`);
|
||||
}
|
||||
|
||||
const html = await response.text();
|
||||
return html;
|
||||
} catch (error) {
|
||||
console.error('Error fetching HTML:', error);
|
||||
throw new Error(`Failed to fetch HTML from ${url}: ${error}`);
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* Extracts title from HTML content
|
||||
*/
|
||||
export const extractTitleFromHtml = (html: string): string => {
|
||||
try {
|
||||
// Try to extract title from <title> tag
|
||||
const titleMatch = html.match(/<title[^>]*>([^<]+)<\/title>/i);
|
||||
if (titleMatch && titleMatch[1]) {
|
||||
return titleMatch[1].trim();
|
||||
}
|
||||
|
||||
// Try to extract from h1 tag
|
||||
const h1Match = html.match(/<h1[^>]*>([^<]+)<\/h1>/i);
|
||||
if (h1Match && h1Match[1]) {
|
||||
return h1Match[1].trim();
|
||||
}
|
||||
|
||||
// Fallback to URL domain
|
||||
return 'Untitled Article';
|
||||
} catch (error) {
|
||||
console.error('Error extracting title:', error);
|
||||
return 'Untitled Article';
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* Saves an HTML article to local storage
|
||||
*/
|
||||
export const saveHtmlArticle = async (
|
||||
url: string,
|
||||
title?: string,
|
||||
): Promise<Article> => {
|
||||
try {
|
||||
// Fetch HTML content
|
||||
const htmlContent = await fetchHtmlFromUrl(url);
|
||||
|
||||
// Extract title if not provided
|
||||
const extractedTitle = title || extractTitleFromHtml(htmlContent);
|
||||
|
||||
// Create article object
|
||||
const article: Article = {
|
||||
id: Date.now().toString(),
|
||||
title: extractedTitle,
|
||||
url: url,
|
||||
htmlContent: htmlContent,
|
||||
archivedAt: new Date().toISOString(),
|
||||
type: 'html',
|
||||
};
|
||||
|
||||
// Get existing articles
|
||||
const existingArticles = await AsyncStorage.getItem('articles');
|
||||
const articles: Article[] = existingArticles
|
||||
? JSON.parse(existingArticles)
|
||||
: [];
|
||||
|
||||
// Add new article to the beginning
|
||||
const updatedArticles = [article, ...articles];
|
||||
|
||||
// Save to storage
|
||||
await AsyncStorage.setItem('articles', JSON.stringify(updatedArticles));
|
||||
|
||||
return article;
|
||||
} catch (error) {
|
||||
console.error('Error saving HTML article:', error);
|
||||
throw error;
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* Saves a regular link article to local storage
|
||||
*/
|
||||
export const saveLinkArticle = async (
|
||||
url: string,
|
||||
title?: string,
|
||||
): Promise<Article> => {
|
||||
try {
|
||||
// Create article object
|
||||
const article: Article = {
|
||||
id: Date.now().toString(),
|
||||
title: title || 'Untitled Article',
|
||||
url: url,
|
||||
archivedAt: new Date().toISOString(),
|
||||
type: 'link',
|
||||
};
|
||||
|
||||
// Get existing articles
|
||||
const existingArticles = await AsyncStorage.getItem('articles');
|
||||
const articles: Article[] = existingArticles
|
||||
? JSON.parse(existingArticles)
|
||||
: [];
|
||||
|
||||
// Add new article to the beginning
|
||||
const updatedArticles = [article, ...articles];
|
||||
|
||||
// Save to storage
|
||||
await AsyncStorage.setItem('articles', JSON.stringify(updatedArticles));
|
||||
|
||||
return article;
|
||||
} catch (error) {
|
||||
console.error('Error saving link article:', error);
|
||||
throw error;
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* Loads all articles from local storage
|
||||
*/
|
||||
export const loadArticles = async (): Promise<Article[]> => {
|
||||
try {
|
||||
const storedArticles = await AsyncStorage.getItem('articles');
|
||||
return storedArticles ? JSON.parse(storedArticles) : [];
|
||||
} catch (error) {
|
||||
console.error('Error loading articles:', error);
|
||||
return [];
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* Deletes an article from local storage
|
||||
*/
|
||||
export const deleteArticle = async (articleId: string): Promise<void> => {
|
||||
try {
|
||||
const articles = await loadArticles();
|
||||
const updatedArticles = articles.filter(
|
||||
article => article.id !== articleId,
|
||||
);
|
||||
await AsyncStorage.setItem('articles', JSON.stringify(updatedArticles));
|
||||
} catch (error) {
|
||||
console.error('Error deleting article:', error);
|
||||
throw error;
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* Validates if a URL is accessible and returns HTML
|
||||
*/
|
||||
export const validateHtmlUrl = async (url: string): Promise<boolean> => {
|
||||
try {
|
||||
const response = await fetch(url, { method: 'HEAD' });
|
||||
return response.ok;
|
||||
} catch (error) {
|
||||
console.error('Error validating URL:', error);
|
||||
return false;
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* Makes a GET request with custom headers
|
||||
*/
|
||||
export const makeGetRequest = async (
|
||||
url: string,
|
||||
apiKey: string,
|
||||
): Promise<Response> => {
|
||||
try {
|
||||
const response = await fetch(url, {
|
||||
method: 'GET',
|
||||
headers: {
|
||||
accept: 'application/json',
|
||||
'X-ArchiveBox-API-Key': apiKey,
|
||||
},
|
||||
});
|
||||
|
||||
if (!response.ok) {
|
||||
throw new Error(`HTTP error! status: ${response.status}`);
|
||||
}
|
||||
|
||||
return response;
|
||||
} catch (error) {
|
||||
console.error('Error making GET request:', error);
|
||||
throw error;
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* Fetches archive results from the API
|
||||
*/
|
||||
export const fetchArchiveResults = async (
|
||||
baseUrl: string,
|
||||
apiKey: string,
|
||||
): Promise<any> => {
|
||||
try {
|
||||
// Use the provided parameters instead of hardcoded values
|
||||
// Clean up the base URL (remove trailing slash and colon if present)
|
||||
const cleanBaseUrl = baseUrl.replace(/[\/:]+$/, '');
|
||||
|
||||
// Try different possible API endpoints
|
||||
const possibleEndpoints = [
|
||||
`${cleanBaseUrl}/api/v1/core/archiveresults?limit=200&extractor=title`,
|
||||
`${cleanBaseUrl}/api/archiveresults?limit=200&extractor=title`,
|
||||
`${cleanBaseUrl}/archiveresults?limit=200&extractor=title`,
|
||||
`${cleanBaseUrl}/api/v1/archiveresults?limit=200&extractor=title`,
|
||||
];
|
||||
|
||||
console.log('=== API REQUEST DEBUG ===');
|
||||
console.log('Original baseUrl:', baseUrl);
|
||||
console.log('Cleaned baseUrl:', cleanBaseUrl);
|
||||
console.log('Attempting to fetch from endpoints:', possibleEndpoints);
|
||||
console.log('Using API key:', apiKey ? 'Present' : 'Missing');
|
||||
|
||||
let lastError: Error | null = null;
|
||||
|
||||
for (const endpoint of possibleEndpoints) {
|
||||
try {
|
||||
console.log(`\n🔗 Trying endpoint: ${endpoint}`);
|
||||
|
||||
const response = await fetch(endpoint, {
|
||||
method: 'GET',
|
||||
headers: {
|
||||
Accept: 'application/json',
|
||||
'Content-Type': 'application/json',
|
||||
'X-ArchiveBox-API-Key': apiKey,
|
||||
Authorization: `Bearer ${apiKey}`,
|
||||
'User-Agent': 'PocketDog/1.0',
|
||||
},
|
||||
});
|
||||
|
||||
console.log(`✅ Response status: ${response.status}`);
|
||||
console.log(
|
||||
`📋 Response headers:`,
|
||||
Object.fromEntries(response.headers.entries()),
|
||||
);
|
||||
|
||||
if (!response.ok) {
|
||||
const errorText = await response.text();
|
||||
console.log(`❌ Error response body: ${errorText}`);
|
||||
throw new Error(`HTTP ${response.status}: ${errorText}`);
|
||||
}
|
||||
|
||||
const data = await response.json();
|
||||
console.log('🎉 Successfully fetched data:', data);
|
||||
|
||||
// Parse the results and download HTML content
|
||||
const processedData = await processArchiveResults(data, cleanBaseUrl);
|
||||
return processedData;
|
||||
} catch (error) {
|
||||
console.log(`❌ Failed to fetch from ${endpoint}:`, error);
|
||||
lastError = error as Error;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
// If all endpoints failed, throw the last error
|
||||
throw lastError || new Error('All API endpoints failed');
|
||||
} catch (error) {
|
||||
console.error('🚨 Error fetching archive results:', error);
|
||||
|
||||
// Provide more specific error messages with URL info
|
||||
if (error instanceof TypeError && error.message.includes('fetch')) {
|
||||
throw new Error(
|
||||
`Network error: Failed to reach ${baseUrl}. Tried endpoints: ${possibleEndpoints.join(
|
||||
', ',
|
||||
)}. Please check your internet connection and ensure the server is running.`,
|
||||
);
|
||||
}
|
||||
|
||||
if (error instanceof Error && error.message.includes('401')) {
|
||||
throw new Error(
|
||||
`Authentication failed: Please check your API key. Tried URL: ${baseUrl}`,
|
||||
);
|
||||
}
|
||||
|
||||
if (error instanceof Error && error.message.includes('404')) {
|
||||
throw new Error(
|
||||
`API endpoint not found: Tried multiple endpoints on ${baseUrl}. Attempted URLs: ${possibleEndpoints.join(
|
||||
', ',
|
||||
)}`,
|
||||
);
|
||||
}
|
||||
|
||||
throw new Error(
|
||||
`Failed to fetch archive results from ${baseUrl}. Tried URLs: ${possibleEndpoints.join(
|
||||
', ',
|
||||
)}. Error: ${error.message}`,
|
||||
);
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* Processes archive results and downloads HTML content
|
||||
*/
|
||||
const processArchiveResults = async (data: any, baseUrl: string) => {
|
||||
try {
|
||||
console.log('📊 Processing archive results...');
|
||||
|
||||
// Check if data has items array
|
||||
const items = data.items || data.results || data.data || [];
|
||||
|
||||
if (!Array.isArray(items)) {
|
||||
console.log('⚠️ No items array found in response:', data);
|
||||
return {
|
||||
...data,
|
||||
processedItems: [],
|
||||
totalProcessed: 0,
|
||||
downloadedArticles: [],
|
||||
};
|
||||
}
|
||||
|
||||
console.log(`📦 Found ${items.length} items to process`);
|
||||
|
||||
const processedItems = [];
|
||||
const downloadedArticles = [];
|
||||
|
||||
for (let index = 0; index < items.length; index++) {
|
||||
const item = items[index];
|
||||
|
||||
// Extract timestamp from the item
|
||||
const timestamp =
|
||||
item.snapshot_timestamp ||
|
||||
item.timestamp ||
|
||||
item.date ||
|
||||
item.created_at ||
|
||||
item.archived_at;
|
||||
|
||||
if (!timestamp) {
|
||||
console.log(`⚠️ No timestamp found for item ${index}:`, item);
|
||||
processedItems.push({
|
||||
...item,
|
||||
downloadableUrl: null,
|
||||
error: 'No timestamp found',
|
||||
});
|
||||
continue;
|
||||
}
|
||||
|
||||
// Create downloadable URL using the format: {base_url}/archive/{timestamp}/mercury/content.html
|
||||
const downloadableUrl = `${baseUrl}/archive/${timestamp}/mercury/content.html`;
|
||||
|
||||
console.log(
|
||||
`🔗 Item ${index}: ${
|
||||
item.title || item.url || 'Untitled'
|
||||
} -> ${downloadableUrl}`,
|
||||
);
|
||||
|
||||
const processedItem = {
|
||||
...item,
|
||||
downloadableUrl,
|
||||
originalUrl: item.url || item.original_url || item.link,
|
||||
title: item.output || item.title || item.name || 'Untitled',
|
||||
timestamp: timestamp,
|
||||
};
|
||||
|
||||
processedItems.push(processedItem);
|
||||
|
||||
// Download the HTML content using snapshot_timestamp
|
||||
try {
|
||||
console.log(`📥 Downloading HTML for item ${index}...`);
|
||||
const htmlContent = await fetchHtmlFromUrl(downloadableUrl);
|
||||
|
||||
// Create article object
|
||||
const article: Article = {
|
||||
id: `archive_${timestamp}_${index}`,
|
||||
title: processedItem.title,
|
||||
url: processedItem.originalUrl,
|
||||
htmlContent: htmlContent,
|
||||
archivedAt: new Date().toISOString(),
|
||||
type: 'html',
|
||||
source: 'archive_api',
|
||||
timestamp: timestamp,
|
||||
};
|
||||
|
||||
// Save to local storage
|
||||
await saveArticleToStorage(article);
|
||||
downloadedArticles.push(article);
|
||||
|
||||
console.log(
|
||||
`✅ Successfully downloaded and saved article: ${article.title}`,
|
||||
);
|
||||
} catch (downloadError) {
|
||||
console.log(
|
||||
`❌ Failed to download HTML for item ${index}:`,
|
||||
downloadError,
|
||||
);
|
||||
processedItem.downloadError = downloadError.message;
|
||||
}
|
||||
}
|
||||
|
||||
console.log(
|
||||
`✅ Successfully processed ${processedItems.length} items and downloaded ${downloadedArticles.length} articles`,
|
||||
);
|
||||
|
||||
return {
|
||||
...data,
|
||||
processedItems,
|
||||
totalProcessed: processedItems.length,
|
||||
downloadedArticles,
|
||||
totalDownloaded: downloadedArticles.length,
|
||||
baseUrl: baseUrl,
|
||||
};
|
||||
} catch (error) {
|
||||
console.error('❌ Error processing archive results:', error);
|
||||
return {
|
||||
...data,
|
||||
processedItems: [],
|
||||
totalProcessed: 0,
|
||||
downloadedArticles: [],
|
||||
totalDownloaded: 0,
|
||||
error: `Processing error: ${error.message}`,
|
||||
};
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* Saves an article to local storage
|
||||
*/
|
||||
const saveArticleToStorage = async (article: Article): Promise<void> => {
|
||||
try {
|
||||
// Get existing articles
|
||||
const existingArticles = await AsyncStorage.getItem('articles');
|
||||
const articles: Article[] = existingArticles
|
||||
? JSON.parse(existingArticles)
|
||||
: [];
|
||||
|
||||
// Check if article already exists (by ID)
|
||||
const existingIndex = articles.findIndex(a => a.id === article.id);
|
||||
|
||||
if (existingIndex >= 0) {
|
||||
// Update existing article
|
||||
articles[existingIndex] = article;
|
||||
} else {
|
||||
// Add new article to the beginning
|
||||
articles.unshift(article);
|
||||
}
|
||||
|
||||
// Save to storage
|
||||
await AsyncStorage.setItem('articles', JSON.stringify(articles));
|
||||
} catch (error) {
|
||||
console.error('Error saving article to storage:', error);
|
||||
throw error;
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* Marks an article as read
|
||||
*/
|
||||
export const markArticleAsRead = async (articleId: string): Promise<void> => {
|
||||
try {
|
||||
const articles = await loadArticles();
|
||||
const updatedArticles = articles.map(article => {
|
||||
if (article.id === articleId) {
|
||||
return {
|
||||
...article,
|
||||
isRead: true,
|
||||
readAt: new Date().toISOString(),
|
||||
};
|
||||
}
|
||||
return article;
|
||||
});
|
||||
await AsyncStorage.setItem('articles', JSON.stringify(updatedArticles));
|
||||
} catch (error) {
|
||||
console.error('Error marking article as read:', error);
|
||||
throw error;
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* Marks an article as unread
|
||||
*/
|
||||
export const markArticleAsUnread = async (articleId: string): Promise<void> => {
|
||||
try {
|
||||
const articles = await loadArticles();
|
||||
const updatedArticles = articles.map(article => {
|
||||
if (article.id === articleId) {
|
||||
return {
|
||||
...article,
|
||||
isRead: false,
|
||||
readAt: undefined,
|
||||
};
|
||||
}
|
||||
return article;
|
||||
});
|
||||
await AsyncStorage.setItem('articles', JSON.stringify(updatedArticles));
|
||||
} catch (error) {
|
||||
console.error('Error marking article as unread:', error);
|
||||
throw error;
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* Gets read statistics
|
||||
*/
|
||||
export const getReadStats = async (): Promise<{
|
||||
total: number;
|
||||
read: number;
|
||||
unread: number;
|
||||
}> => {
|
||||
try {
|
||||
const articles = await loadArticles();
|
||||
const total = articles.length;
|
||||
const read = articles.filter(article => article.isRead).length;
|
||||
const unread = total - read;
|
||||
|
||||
return { total, read, unread };
|
||||
} catch (error) {
|
||||
console.error('Error getting read stats:', error);
|
||||
return { total: 0, read: 0, unread: 0 };
|
||||
}
|
||||
};
|
||||
Reference in New Issue
Block a user