mirror of
https://github.com/kemko/icecast-ripper.git
synced 2026-01-01 15:55:42 +03:00
first release
This commit is contained in:
6
.env.example
Normal file
6
.env.example
Normal file
@@ -0,0 +1,6 @@
|
|||||||
|
STREAM_URL=http://example.com/stream
|
||||||
|
CHECK_INTERVAL=60
|
||||||
|
RECORD_DIRECTORY=/records
|
||||||
|
CONNECT_TIMEOUT=10
|
||||||
|
FIRST_BYTE_TIMEOUT=30
|
||||||
|
WEB_SERVER_PORT=8080
|
||||||
19
.gitignore
vendored
Normal file
19
.gitignore
vendored
Normal file
@@ -0,0 +1,19 @@
|
|||||||
|
records
|
||||||
|
|
||||||
|
# Byte-compiled / optimized / DLL files
|
||||||
|
__pycache__/
|
||||||
|
*.py[cod]
|
||||||
|
*$py.class
|
||||||
|
|
||||||
|
# C extensions
|
||||||
|
*.so
|
||||||
|
# Environments
|
||||||
|
.env
|
||||||
|
.venv
|
||||||
|
env/
|
||||||
|
venv/
|
||||||
|
ENV/
|
||||||
|
env.bak/
|
||||||
|
venv.bak/
|
||||||
|
|
||||||
|
*.swp
|
||||||
20
Dockerfile
Normal file
20
Dockerfile
Normal file
@@ -0,0 +1,20 @@
|
|||||||
|
# Use an official Python runtime as the parent image
|
||||||
|
FROM python:3.11-slim
|
||||||
|
|
||||||
|
# Set the working directory in the container
|
||||||
|
WORKDIR /app
|
||||||
|
|
||||||
|
# Copy the current directory contents into the container at the working directory
|
||||||
|
COPY . .
|
||||||
|
|
||||||
|
# Install any needed packages specified in requirements.txt
|
||||||
|
RUN pip install --no-cache-dir -r requirements.txt
|
||||||
|
|
||||||
|
# Make port 8080 available to the world outside this container
|
||||||
|
EXPOSE 8080
|
||||||
|
|
||||||
|
# Define environment variable for record directory
|
||||||
|
ENV RECORD_DIRECTORY=/records
|
||||||
|
|
||||||
|
# Run main.py when the container launches
|
||||||
|
CMD [ "python", "./src/main.py" ]
|
||||||
22
Makefile
Normal file
22
Makefile
Normal file
@@ -0,0 +1,22 @@
|
|||||||
|
run:
|
||||||
|
@echo "Starting Icecast Recorder Service"
|
||||||
|
python src/main.py
|
||||||
|
|
||||||
|
test:
|
||||||
|
@echo "Running tests"
|
||||||
|
python -m unittest discover -s tests/
|
||||||
|
|
||||||
|
build:
|
||||||
|
@echo "Building Docker image for Icecast Recorder Service"
|
||||||
|
docker build -t icecast-recorder .
|
||||||
|
|
||||||
|
docker-run: build
|
||||||
|
@echo "Running Icecast Recorder Service in a Docker container"
|
||||||
|
docker run -p 8080:8080 --env-file .env.example icecast-recorder
|
||||||
|
|
||||||
|
clean:
|
||||||
|
@echo "Cleaning up pycache and .pyc files"
|
||||||
|
find . -type d -name pycache -exec rm -r {} +
|
||||||
|
find . -type f -name '*.pyc' -delete
|
||||||
|
|
||||||
|
.PHONY: run test build docker-run clean
|
||||||
16
docker-compose.yml
Normal file
16
docker-compose.yml
Normal file
@@ -0,0 +1,16 @@
|
|||||||
|
version: '3'
|
||||||
|
|
||||||
|
services:
|
||||||
|
icecast-recorder:
|
||||||
|
build: .
|
||||||
|
ports:
|
||||||
|
- "8080:8080"
|
||||||
|
environment:
|
||||||
|
- STREAM_URL=http://example.com/stream
|
||||||
|
- CHECK_INTERVAL=60
|
||||||
|
- RECORD_DIRECTORY=/records
|
||||||
|
- CONNECT_TIMEOUT=10
|
||||||
|
- FIRST_BYTE_TIMEOUT=30
|
||||||
|
- WEB_SERVER_PORT=8080
|
||||||
|
volumes:
|
||||||
|
- ./records:/records
|
||||||
3
requirements.txt
Normal file
3
requirements.txt
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
aiohttp==3.9.1
|
||||||
|
yattag==1.15.2
|
||||||
|
python-dotenv==1.0.0
|
||||||
48
src/config.py
Normal file
48
src/config.py
Normal file
@@ -0,0 +1,48 @@
|
|||||||
|
import argparse
|
||||||
|
import os
|
||||||
|
from dotenv import load_dotenv
|
||||||
|
|
||||||
|
# Load .env file if available
|
||||||
|
load_dotenv()
|
||||||
|
|
||||||
|
# Default configuration values
|
||||||
|
DEFAULTS = {
|
||||||
|
'server_host': 'https://example.org',
|
||||||
|
'server_port': 8080,
|
||||||
|
'stream_url': 'http://example.com/stream',
|
||||||
|
'output_directory': './records',
|
||||||
|
'check_interval': 60,
|
||||||
|
'timeout_connect': 10,
|
||||||
|
'timeout_read': 30,
|
||||||
|
}
|
||||||
|
|
||||||
|
def parse_arguments():
|
||||||
|
parser = argparse.ArgumentParser(description='Icecast Recorder Service')
|
||||||
|
parser.add_argument('--server-host', help='Server host name with protocol')
|
||||||
|
parser.add_argument('--server-port', type=int, help='Server port number')
|
||||||
|
parser.add_argument('--stream-url', help='URL of the Icecast stream to monitor and record')
|
||||||
|
parser.add_argument('--file-url-base', help='Base URL used for constructing file links in the RSS feed')
|
||||||
|
parser.add_argument('--output-directory', help='Directory to save the recordings')
|
||||||
|
parser.add_argument('--check-interval', type=int, help='Interval to check the stream in seconds')
|
||||||
|
parser.add_argument('--timeout-connect', type=int, help='Timeout for connecting to the stream in seconds')
|
||||||
|
parser.add_argument('--timeout-read', type=int, help='Read timeout in seconds')
|
||||||
|
return vars(parser.parse_args())
|
||||||
|
|
||||||
|
def load_configuration():
|
||||||
|
cmd_args = parse_arguments()
|
||||||
|
|
||||||
|
# Configuration is established using a priority: CommandLine > EnvironmentVars > Defaults
|
||||||
|
config = {
|
||||||
|
'server_host': cmd_args['server_host'] or os.getenv('SERVER_HOST') or DEFAULTS['server_host'],
|
||||||
|
'server_port': cmd_args['server_port'] or os.getenv('SERVER_PORT') or DEFAULTS['server_port'],
|
||||||
|
'stream_url': cmd_args['stream_url'] or os.getenv('STREAM_URL') or DEFAULTS['stream_url'],
|
||||||
|
'output_directory': cmd_args['output_directory'] or os.getenv('OUTPUT_DIRECTORY') or DEFAULTS['output_directory'],
|
||||||
|
'check_interval': cmd_args['check_interval'] or os.getenv('CHECK_INTERVAL') or DEFAULTS['check_interval'],
|
||||||
|
'timeout_connect': cmd_args['timeout_connect'] or os.getenv('TIMEOUT_CONNECT') or DEFAULTS['timeout_connect'],
|
||||||
|
'timeout_read': cmd_args['timeout_read'] or os.getenv('TIMEOUT_READ') or DEFAULTS['timeout_read']
|
||||||
|
}
|
||||||
|
|
||||||
|
# Converting string paths to absolute paths
|
||||||
|
config['output_directory'] = os.path.abspath(config['output_directory'])
|
||||||
|
|
||||||
|
return argparse.Namespace(**config)
|
||||||
37
src/logger.py
Normal file
37
src/logger.py
Normal file
@@ -0,0 +1,37 @@
|
|||||||
|
import json
|
||||||
|
import sys
|
||||||
|
from datetime import datetime
|
||||||
|
|
||||||
|
# Log levels
|
||||||
|
DEBUG = "DEBUG"
|
||||||
|
INFO = "INFO"
|
||||||
|
WARNING = "WARNING"
|
||||||
|
ERROR = "ERROR"
|
||||||
|
FATAL = "FATAL"
|
||||||
|
|
||||||
|
def log_event(event, details, level=INFO):
|
||||||
|
log_entry = {
|
||||||
|
"timestamp": datetime.utcnow().isoformat(),
|
||||||
|
"event": event,
|
||||||
|
"level": level,
|
||||||
|
"details": details
|
||||||
|
}
|
||||||
|
json_log_entry = json.dumps(log_entry)
|
||||||
|
print(json_log_entry, file=sys.stdout) # Write to stdout
|
||||||
|
sys.stdout.flush() # Immediately flush the log entry
|
||||||
|
|
||||||
|
# Specific log functions per level for convenience
|
||||||
|
def log_debug(event, details):
|
||||||
|
log_event(event, details, level=DEBUG)
|
||||||
|
|
||||||
|
def log_info(event, details):
|
||||||
|
log_event(event, details, level=INFO)
|
||||||
|
|
||||||
|
def log_warning(event, details):
|
||||||
|
log_event(event, details, level=WARNING)
|
||||||
|
|
||||||
|
def log_error(event, details):
|
||||||
|
log_event(event, details, level=ERROR)
|
||||||
|
|
||||||
|
def log_fatal(event, details):
|
||||||
|
log_event(event, details, level=FATAL)
|
||||||
37
src/main.py
Normal file
37
src/main.py
Normal file
@@ -0,0 +1,37 @@
|
|||||||
|
import asyncio
|
||||||
|
from server import start_server
|
||||||
|
from stream_checker import StreamChecker
|
||||||
|
from config import load_configuration
|
||||||
|
|
||||||
|
def main():
|
||||||
|
# Load configuration from command line arguments and environment variables
|
||||||
|
config = load_configuration()
|
||||||
|
|
||||||
|
# Create the StreamChecker instance
|
||||||
|
checker = StreamChecker(
|
||||||
|
stream_url=config.stream_url,
|
||||||
|
check_interval=config.check_interval,
|
||||||
|
timeout_connect=config.timeout_connect,
|
||||||
|
timeout_read=config.timeout_read,
|
||||||
|
output_directory=config.output_directory
|
||||||
|
)
|
||||||
|
|
||||||
|
# Start the Icecast stream checking and recording loop
|
||||||
|
checker_task = asyncio.ensure_future(checker.run())
|
||||||
|
|
||||||
|
# Start the health check and file serving server
|
||||||
|
server_task = asyncio.ensure_future(start_server(config))
|
||||||
|
|
||||||
|
# Run both tasks in the event loop
|
||||||
|
loop = asyncio.get_event_loop()
|
||||||
|
try:
|
||||||
|
loop.run_until_complete(asyncio.gather(checker_task, server_task))
|
||||||
|
except KeyboardInterrupt:
|
||||||
|
pass
|
||||||
|
finally:
|
||||||
|
checker_task.cancel()
|
||||||
|
server_task.cancel()
|
||||||
|
loop.close()
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
67
src/recorder.py
Normal file
67
src/recorder.py
Normal file
@@ -0,0 +1,67 @@
|
|||||||
|
import aiohttp
|
||||||
|
import os
|
||||||
|
from datetime import datetime, timedelta
|
||||||
|
from logger import log_event, log_error
|
||||||
|
from utils import sanitize_filename
|
||||||
|
from pprint import pprint
|
||||||
|
|
||||||
|
class Recorder:
|
||||||
|
def __init__(self, stream_url, output_directory, timeout_connect=10, timeout_read=30):
|
||||||
|
self.stream_url = stream_url
|
||||||
|
self.output_directory = output_directory
|
||||||
|
self.timeout_read = timeout_read
|
||||||
|
self.timeout_connect = timeout_connect
|
||||||
|
self.file_name = None
|
||||||
|
self.start_time = None
|
||||||
|
self.last_data_time = None
|
||||||
|
self.is_recording = False
|
||||||
|
|
||||||
|
async def start_recording(self):
|
||||||
|
self.start_time = datetime.utcnow()
|
||||||
|
domain = self.stream_url.split("//")[-1].split("/")[0]
|
||||||
|
sanitized_domain = sanitize_filename(domain)
|
||||||
|
date_str = self.start_time.strftime("%Y%m%d_%H%M%S")
|
||||||
|
self.file_name = f"{sanitized_domain}_{date_str}.mp3.tmp"
|
||||||
|
self.file_path = os.path.join(self.output_directory, self.file_name)
|
||||||
|
try:
|
||||||
|
timeout = aiohttp.ClientTimeout(total=None, connect=self.timeout_connect, sock_read=self.timeout_read)
|
||||||
|
async with aiohttp.ClientSession(timeout=timeout) as session:
|
||||||
|
async with session.get(self.stream_url) as response:
|
||||||
|
if response.status == 200:
|
||||||
|
self.is_recording = True
|
||||||
|
log_event("recording_started", {"file_name": self.file_name, "stream_url": self.stream_url})
|
||||||
|
async for data, _ in response.content.iter_chunks():
|
||||||
|
if not data:
|
||||||
|
break
|
||||||
|
self.last_data_time = datetime.utcnow()
|
||||||
|
with open(self.file_path, 'ab') as f:
|
||||||
|
f.write(data)
|
||||||
|
# Check if timeout exceeded between data chunks
|
||||||
|
if datetime.utcnow() - self.last_data_time > timedelta(seconds=self.timeout_read):
|
||||||
|
log_error("timeout_exceeded", {
|
||||||
|
"stream_url": self.stream_url,
|
||||||
|
"elapsed_seconds": (datetime.utcnow() - self.last_data_time).total_seconds()
|
||||||
|
}, level="WARNING")
|
||||||
|
break
|
||||||
|
|
||||||
|
log_event("recording_finished", {"file_name": self.file_name, "stream_url": self.stream_url})
|
||||||
|
else:
|
||||||
|
log_event("stream_unavailable", {"http_status": response.status})
|
||||||
|
except Exception as e:
|
||||||
|
log_event('recording_error', {"error": str(e)}, level="ERROR")
|
||||||
|
pprint(e)
|
||||||
|
finally:
|
||||||
|
self.is_recording = False
|
||||||
|
self.end_recording()
|
||||||
|
|
||||||
|
def end_recording(self):
|
||||||
|
if os.path.exists(self.file_path):
|
||||||
|
finished_file = self.file_path.replace('.tmp', '')
|
||||||
|
os.rename(self.file_path, finished_file)
|
||||||
|
log_event("recording_saved", {
|
||||||
|
"file_name": finished_file,
|
||||||
|
"duration": (datetime.utcnow() - self.start_time).total_seconds() if self.start_time else 0
|
||||||
|
})
|
||||||
|
|
||||||
|
def is_active(self):
|
||||||
|
return self.is_recording
|
||||||
38
src/rss_generator.py
Normal file
38
src/rss_generator.py
Normal file
@@ -0,0 +1,38 @@
|
|||||||
|
from yattag import Doc
|
||||||
|
import os
|
||||||
|
from utils import generate_file_hash, file_hash_to_id
|
||||||
|
from datetime import datetime
|
||||||
|
from yattag import Doc
|
||||||
|
import os
|
||||||
|
from utils import generate_file_hash, file_hash_to_id
|
||||||
|
|
||||||
|
def generate_rss_feed(files, output_directory, server_host):
|
||||||
|
doc, tag, text = Doc().tagtext()
|
||||||
|
|
||||||
|
doc.asis('<?xml version="1.0" encoding="UTF-8"?>')
|
||||||
|
with tag('rss', version='2.0'):
|
||||||
|
with tag('channel'):
|
||||||
|
with tag('title'):
|
||||||
|
text('Icecast Stream Recordings')
|
||||||
|
with tag('description'):
|
||||||
|
text('The latest recordings from the Icecast server.')
|
||||||
|
with tag('link'):
|
||||||
|
text(server_host)
|
||||||
|
|
||||||
|
for file_name in files:
|
||||||
|
file_path = os.path.join(output_directory, file_name)
|
||||||
|
file_hash = generate_file_hash(file_path)
|
||||||
|
file_id = file_hash_to_id(file_hash)
|
||||||
|
|
||||||
|
with tag('item'):
|
||||||
|
with tag('title'):
|
||||||
|
text(file_name)
|
||||||
|
with tag('link'):
|
||||||
|
text(f'{server_host}/files/{file_name}')
|
||||||
|
with tag('guid', isPermaLink='false'):
|
||||||
|
text(file_id)
|
||||||
|
with tag('pubDate'):
|
||||||
|
pub_date = datetime.utcfromtimestamp(os.path.getctime(file_path)).strftime('%a, %d %b %Y %H:%M:%S UTC')
|
||||||
|
text(pub_date)
|
||||||
|
|
||||||
|
return doc.getvalue()
|
||||||
59
src/server.py
Normal file
59
src/server.py
Normal file
@@ -0,0 +1,59 @@
|
|||||||
|
from aiohttp import web
|
||||||
|
import os
|
||||||
|
import mimetypes
|
||||||
|
from rss_generator import generate_rss_feed
|
||||||
|
from logger import log_event
|
||||||
|
from pprint import pprint
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
routes = web.RouteTableDef()
|
||||||
|
|
||||||
|
@routes.get('/health')
|
||||||
|
async def helth_check(request):
|
||||||
|
log_event("health_check_requested", {"method": "GET", "path": request.path}, level="INFO")
|
||||||
|
return web.Response(text="OK")
|
||||||
|
|
||||||
|
@routes.get('/rss')
|
||||||
|
async def rss_feed(request):
|
||||||
|
log_event("rss_feed_requested", {"method": "GET", "path": request.path}, level="INFO")
|
||||||
|
output_directory = request.app['config'].output_directory
|
||||||
|
files = [f for f in os.listdir(output_directory) if f.endswith('.mp3')]
|
||||||
|
rss_xml = generate_rss_feed(files, output_directory, request.app['config'].server_host)
|
||||||
|
return web.Response(text=rss_xml, content_type='application/rss+xml')
|
||||||
|
|
||||||
|
@routes.get('/files/{file_name}')
|
||||||
|
async def serve_file(request):
|
||||||
|
file_name = request.match_info['file_name']
|
||||||
|
log_event("file_serve_requested", {"method": "GET", "path": request.path, "file_name": file_name}, level="INFO")
|
||||||
|
|
||||||
|
output_directory = request.app['config'].output_directory
|
||||||
|
file_path = os.path.join(output_directory, file_name)
|
||||||
|
pprint(file_path)
|
||||||
|
|
||||||
|
if not Path(output_directory).joinpath(file_name).resolve().relative_to(Path(output_directory).resolve()):
|
||||||
|
log_event("file_access_denied", {"file_name": file_name}, level="WARNING")
|
||||||
|
return web.Response(status=403, text='Access denied')
|
||||||
|
|
||||||
|
if not os.path.exists(file_path):
|
||||||
|
log_event("file_not_found", {"file_name": file_name}, level="WARNING")
|
||||||
|
return web.Response(status=404, text='File not found')
|
||||||
|
|
||||||
|
file = os.path.basename(file_path)
|
||||||
|
content_type, _ = mimetypes.guess_type(file)
|
||||||
|
|
||||||
|
headers = {
|
||||||
|
'Content-Disposition': f'attachment; filename="{file}"',
|
||||||
|
'Content-Type': content_type or 'application/octet-stream',
|
||||||
|
}
|
||||||
|
return web.FileResponse(file_path, headers=headers)
|
||||||
|
|
||||||
|
async def start_server(config):
|
||||||
|
app = web.Application()
|
||||||
|
app['config'] = config
|
||||||
|
app.add_routes(routes)
|
||||||
|
runner = web.AppRunner(app)
|
||||||
|
await runner.setup()
|
||||||
|
site = web.TCPSite(runner, '0.0.0.0', config.server_port)
|
||||||
|
log_event('server_starting', {'port': config.server_port}, level="INFO")
|
||||||
|
await site.start()
|
||||||
|
log_event('server_started', {'port': config.server_port}, level="INFO")
|
||||||
42
src/stream_checker.py
Normal file
42
src/stream_checker.py
Normal file
@@ -0,0 +1,42 @@
|
|||||||
|
import asyncio
|
||||||
|
from pprint import pprint
|
||||||
|
from aiohttp import ClientSession, ClientTimeout
|
||||||
|
from recorder import Recorder
|
||||||
|
from logger import log_event
|
||||||
|
|
||||||
|
class StreamChecker:
|
||||||
|
def __init__(self, stream_url, check_interval, timeout_connect, output_directory, timeout_read=30):
|
||||||
|
self.stream_url = stream_url
|
||||||
|
self.check_interval = check_interval
|
||||||
|
self.timeout_connect = timeout_connect
|
||||||
|
self.timeout_read = timeout_read
|
||||||
|
self.output_directory = output_directory
|
||||||
|
self.recorder = None
|
||||||
|
self.is_stream_live = False
|
||||||
|
|
||||||
|
async def check_stream(self, session):
|
||||||
|
try:
|
||||||
|
timeout = ClientTimeout(connect=self.timeout_connect)
|
||||||
|
async with session.get(self.stream_url, timeout=timeout, allow_redirects=True) as response:
|
||||||
|
if response.status == 200:
|
||||||
|
self.is_stream_live = True
|
||||||
|
log_event("stream_live", {"stream_url": self.stream_url})
|
||||||
|
else:
|
||||||
|
self.is_stream_live = False
|
||||||
|
log_event("stream_offline", {"stream_url": self.stream_url})
|
||||||
|
except asyncio.TimeoutError:
|
||||||
|
log_event("check_stream_timeout", {"stream_url": self.stream_url})
|
||||||
|
except Exception as e:
|
||||||
|
print(self.stream_url)
|
||||||
|
log_event("check_stream_error", {"error": str(e)})
|
||||||
|
|
||||||
|
async def run(self):
|
||||||
|
while True:
|
||||||
|
async with ClientSession() as session:
|
||||||
|
await self.check_stream(session)
|
||||||
|
|
||||||
|
if self.is_stream_live and (self.recorder is None or not self.recorder.is_active()):
|
||||||
|
self.recorder = Recorder(self.stream_url, self.output_directory, self.timeout_read)
|
||||||
|
await self.recorder.start_recording()
|
||||||
|
|
||||||
|
await asyncio.sleep(self.check_interval)
|
||||||
27
src/utils.py
Normal file
27
src/utils.py
Normal file
@@ -0,0 +1,27 @@
|
|||||||
|
import hashlib
|
||||||
|
import string
|
||||||
|
|
||||||
|
def sanitize_filename(filename):
|
||||||
|
"""
|
||||||
|
Sanitize the filename by removing or replacing invalid characters.
|
||||||
|
"""
|
||||||
|
valid_chars = "-_.() %s%s" % (string.ascii_letters, string.digits)
|
||||||
|
cleaned_filename = "".join(c for c in filename if c in valid_chars)
|
||||||
|
cleaned_filename = cleaned_filename.replace(' ', '_') # Replace spaces with underscores
|
||||||
|
return cleaned_filename
|
||||||
|
|
||||||
|
def generate_file_hash(file_path):
|
||||||
|
"""
|
||||||
|
Generate a hash for file contents to uniquely identify files.
|
||||||
|
"""
|
||||||
|
hasher = hashlib.sha256()
|
||||||
|
with open(file_path, 'rb') as f:
|
||||||
|
while chunk := f.read(8192):
|
||||||
|
hasher.update(chunk)
|
||||||
|
return hasher.hexdigest()
|
||||||
|
|
||||||
|
def file_hash_to_id(file_hash, length=32):
|
||||||
|
"""
|
||||||
|
Convert file hash to a shorter file ID, considering only the first length characters.
|
||||||
|
"""
|
||||||
|
return file_hash[:length]
|
||||||
Reference in New Issue
Block a user